APIC多核启动协议及ICR启动地址

APIC multi-core startup Protocol and ICR starting up address

我正在编写引导加载并尝试测试处理器间中断。我有以下 2 个问题阻止了我:

1、哪里可以找到启动AP的程序;

2、下发IPI时,我应该从哪里加载内存地址,告诉目标处理器从哪个内存地址开始。

感谢您的回答,请附上一个装配示例。

我从现在已经不存在的 Whosebug 文档项目中提取了这个。这最初是由 Margaret Bloom 编写的,我已经清理了她的代码。由于这不是我自己的,所以我将其标记为社区 wiki。可能有您认为有用的信息。


此示例将唤醒每个 应用程序处理器 (AP) 并使它们与 Bootstrap 处理器 ( BSP),显示他们的 LAPIC ID。

; Assemble boot sector and insert it into a 1.44MiB floppy image
;
; nasm -f bin boot.asm -o boot.bin
; dd if=/dev/zero of=disk.img bs=512 count=2880
; dd if=boot.bin of=disk.img bs=512 conv=notrunc

BITS 16
; Bootloader starts at segment:offset 07c0h:0000h
section bootloader, vstart=0000h
jmp 7c0h:__START__

__START__:
 mov ax, cs
 mov ds, ax
 mov es, ax
 mov ss, ax
 xor sp, sp
 cld

 ;Clear screen
 mov ax, 03h
 int 10h

 ;Set limit of 4GiB and base 0 for FS and GS
 call 7c0h:unrealmode

 ;Enable the APIC
 call enable_lapic

 ;Move the payload to the expected address
 mov si, payload_start_abs
 mov cx, payload_end-payload + 1
 mov di, 400h                 ;7c0h:400h = 8000h
 rep movsb


 ;Wakeup the other APs

 ;INIT
 call lapic_send_init
 mov cx, WAIT_10_ms
 call us_wait

 ;SIPI
 call lapic_send_sipi
 mov cx, WAIT_200_us
 call us_wait

 ;SIPI
 call lapic_send_sipi

 ;Jump to the payload
 jmp 0000h:8000h

 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll

 ;CX = Wait (in ms) Max 65536 us (=0 on input)
 us_wait:
  mov dx, 80h               ;POST Diagnose port, 1us per IO
  xor si, si
  rep outsb

  ret

  WAIT_10_ms     EQU 10000
  WAIT_200_us    EQU 200

 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll


 enable_lapic:

  ;Enable the APIC globally
  ;On P6 CPU once this flag is set to 0, it cannot be set back to 16
  ;Without an HARD RESET
  mov ecx, IA32_APIC_BASE_MSR
  rdmsr
  or ah, 08h        ;bit11: APIC GLOBAL Enable/Disable
  wrmsr

  ;Mask off lower 12 bits to get the APIC base address
  and ah, 0f0h
  mov DWORD [APIC_BASE], eax

  ;Newer processors enables the APIC through the Spurious Interrupt Vector register
  mov ecx, DWORD [fs: eax + APIC_REG_SIV]
  or ch, 01h                                ;bit8: APIC SOFTWARE enable/disable
  mov DWORD [fs: eax+APIC_REG_SIV], ecx

  ret

 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll

 lapic_send_sipi:
  mov eax, DWORD [APIC_BASE]

  ;Destination field is set to 0 has we will use a shorthand
  xor ebx, ebx
  mov DWORD [fs: eax+APIC_REG_ICR_HIGH], ebx

  ;Vector: 08h (Will make the CPU execute instruction ad address 08000h)
  ;Delivery mode: Startup
  ;Destination mode: ignored (0)
  ;Level: ignored (1)
  ;Trigger mode: ignored (0)
  ;Shorthand: All excluding self (3)
  mov ebx, 0c4608h
  mov DWORD [fs: eax+APIC_REG_ICR_LOW], ebx  ;Writing the low DWORD sent the IPI

  ret

  ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll

 lapic_send_init:
  mov eax, DWORD [APIC_BASE]

  ;Destination field is set to 0 has we will use a shorthand
  xor ebx, ebx
  mov DWORD [fs: eax+APIC_REG_ICR_HIGH], ebx

  ;Vector: 00h
  ;Delivery mode: Startup
  ;Destination mode: ignored (0)
  ;Level: ignored (1)
  ;Trigger mode: ignored (0)
  ;Shorthand: All excluding self (3)
  mov ebx, 0c4500h
  mov DWORD [fs: eax+APIC_REG_ICR_LOW], ebx  ;Writing the low DWORD sent the IPI

  ret

 IA32_APIC_BASE_MSR    EQU    1bh

 APIC_REG_SIV        EQU    0f0h

 APIC_REG_ICR_LOW    EQU 300h
 APIC_REG_ICR_HIGH    EQU 310h

 APIC_REG_ID        EQU 20h

 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll

 APIC_BASE            dd     00h

 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll

unrealmode:
 lgdt [cs:GDT]

 cli

 mov eax, cr0
 or ax, 01h
 mov cr0, eax

 mov bx, 08h
 mov fs, bx
 mov gs, bx

 and ax, 0fffeh
 mov cr0, eax

 sti

 ;IMPORTAT: This call is FAR!
 ;So it can be called from everywhere
 retf

 GDT:
    dw 0fh
    dd GDT + 7c00h
    dw 00h

    dd 0000ffffh
    dd 00cf9200h

 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll
 ;Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll  Ll

payload_start_abs:
; payload starts at segment:offset 0800h:0000h
section payload, vstart=0000h, align=1
 payload:

  ;IMPORTANT NOTE: Here we are in a \"new\" CPU every state we set before is no
  ;more present here (except for the BSP, but we handler every processor with
  ;the same code).
 jmp 800h: __RESTART__

 __RESTART__:
  mov ax, cs
  mov ds, ax
  xor sp, sp
  cld

  ;IMPORTANT: We can't use the stack yet. Every CPU is pointing to the same stack!

  ;Get an unique id
  mov ax, WORD [counter]
  .try:
    mov bx, ax
    inc bx
    lock cmpxchg WORD [counter], bx
   jnz .try

  mov cx, ax            ;Save this unique id

  ;Stack segment = CS + unique id * 1000
  shl ax, 12
  mov bx, cs
  add ax, bx
  mov ss, ax

  ;Text buffer
  push 0b800h
  pop es

  ;Set unreal mode again
  call 7c0h:unrealmode

  ;Use GS for old variables
  mov ax, 7c0h
  mov gs, ax

  ;Calculate text row
  mov ax, cx
  mov bx, 160d           ;80 * 2
  mul bx
  mov di, ax

  ;Get LAPIC id
  mov ebx, DWORD [gs:APIC_BASE]
  mov edx, DWORD [fs:ebx + APIC_REG_ID]
  shr edx, 24d
  call itoa8

  cli
  hlt

  ;DL = Number
  ;DI = ptr to text buffer
  itoa8:
    mov bx, dx
    shr bx, 0fh
    mov al, BYTE [bx +  digits]
    mov ah, 09h
    stosw

    mov bx, dx
    and bx, 0fh
    mov al, BYTE [bx +  digits]
    mov ah, 09h
    stosw

    ret

  digits db \"0123456789abcdef\"
  counter dw 0

 payload_end:



; Boot signature is at physical offset 01feh of
; the boot sector
section bootsig, start=01feh
 dw 0aa55h

要执行两个主要步骤:

1.唤醒 AP
这是通过向所有 AP 发出 INIT-SIPI-SIPI (ISS) 序列来实现的。

BSP 将使用 shorthand All excluding self 作为目标发送 ISS 序列,从而针对所有 AP。

一个 SIPI(启动处理器间中断)被所有在收到它时唤醒的 CPUs 忽略,因此如果第一个 SIPI 足以唤醒目标,则第二个 SIPI 将被忽略处理器。出于兼容性原因,英特尔建议这样做。

一个 SIPI 包含一个 向量,这在意思上是相似的,但在实践中完全不同,到一个中断向量(a.k.a.中断号).
向量是一个 8 位数,值为 V(以 16 进制表示为 vv),这使得 CPU 开始执行物理 地址 0vv000h.
处的指令 我们将调用0vv000h 唤醒地址 (WA).
WA 被强制在 4KiB(或页面)边界。

我们将使用 08h 作为 V,然后 WA 为 08000h,引导加载程序后 400h 字节。

这将控制权交给了 AP。

2。初始化和区分 AP
在 WA 中有一个可执行代码是必要的。引导加载程序位于 7c00h,因此我们需要在页面边界重新定位一些代码。

编写有效负载时首先要记住的是,必须保护或区分对共享资源的任何访问。
一个共同的共享资源是堆栈,如果我们天真地初始化堆栈,每个AP最终都会使用同一个堆栈!

第一步是使用不同的堆栈地址,从而区分堆栈。
我们通过为每个 CPU 分配一个从零开始的唯一数字来实现这一点。这个数字,我们称之为 index,用于区分堆栈和行,CPU 将写入其 APIC ID。

每个 CPU 的堆栈地址是 800h:(index * 1000h) 给每个 AP 64KiB 的堆栈。
每个 CPU 的行号是 index,指向文本缓冲区的指针因此是 80 * 2 * index

要生成索引,lock cmpxchg 用于自动递增,return 一个 WORD。

最后的笔记
* 写入端口 80h 用于生成 1 µs 的延迟。
* unrealmode是far例程,所以唤醒后也可以调用。
* BSP也跳转到WA.

截图

来自具有 8 个处理器的 Bochs