如何在 qemu-system-aarch64 上编译 baremetal hello_world.c 和 运行 呢?

How to compile baremetal hello_world.c and run it on qemu-system-aarch64?

如题,我想在qemu-system-aarch64上编译hello_world.c程序运行。这是程序:

#include <stdio.h>
int main()
{
printf("hello world!\n");
}

来自 https://releases.linaro.org/components/toolchain/binaries/latest-7/aarch64-elf/(这是 baremetal 目录),我可以看到这些工具链:

folder  aarch64-elf -       
folder  aarch64-linux-gnu   -       
folder  aarch64_be-elf  -       
folder  aarch64_be-linux-gnu    -       
folder  arm-eabi    -       
folder  arm-linux-gnueabi   -       
folder  arm-linux-gnueabihf -       
folder  armeb-eabi  -       
folder  armeb-linux-gnueabi -       
folder  armeb-linux-gnueabihf   -       
folder  armv8l-linux-gnueabihf

所以我选择了 aarch64-elf(这样正确吗?)并将其安装在我的 ubuntu 16.04 机器上,并将 bin 目录添加到路径中。 如果我只是做 aarch64-elf-gcc hello_world.c,我会得到 _exit、_sbrk、_write、_close、_lseek、_read、_fstat、_isatty 函数的未定义引用错误。所以我尝试添加 -spec=aem.ve-specs 并且它没有抱怨(我不确定这是正确的)。 我试过 运行ning qemu.

qemu-system-aarch64 -M virt -cpu cortex-a57 -nographic -smp 1 -m 2048 -kernel a.out

它没有给我任何印刷品。我应该在这里更改什么?

请注意,我只有 x86/amd64 开发经验。但我认为你最初的一些假设是有缺陷的。

首先,默认情况下,简单地编译 C 源代码不会 运行 裸机。您使用 printf,您的编译器随后会在 stdio.h — standard buffered input/output 库中找到它。您现在编译的方式是动态链接到您的程序。即你的程序在调用printf时跳转到libcduring execution。你可以用readelf看到这个。

readelf --dynamic a.out | grep NEEDED
0x0000000000000001 (NEEDED)             Shared library: [libc.so.6]

Qemu模拟机器,但不模拟操作系统。您现在将动态链接的程序作为 kernel/OS 传递。如果没有 libc 库,您的内核如何知道要打印?更重要的是,它怎么知道如何做任何事情,因为基本的启动步骤不是由您的程序完成的(设置和初始化 RAM、设置设备树等)。

我假设您不想编写 ARM64 内核,而只想进入 ARM64 'bare metal' 开发。也许只是下载一个 Aarch64 Linux 发行版(例如 Debian)?

您可以创建一个虚拟驱动器,安装一个 OS,然后在该虚拟机中进行开发以获得虚拟 'bare metal' 开发 ;) 例如 this?

如果我的假设是错误的,而您确实想要进入OS开发,那么这个问题太长了,无法在这里回答。但我建议查看 tutorials and documentation.

你是对的,你可以使用 qemu-system-aarch64 来实现你的目标。 根据您确切想做的事情,您有多种选择:

  1. 使用 qemu 的 semihosting 模式,以及带有 newlib, 的 gcc --specs=rdimon.specs 或使用另一个半主机库,例如 Arm 中可用的Trusted Firmware 源代码 - 以下示例使用此方法。

  2. 提供你自己的 syscalls.c ,并使用 --specs=nosys.specs ld 选项,这样你就可以在裸机程序中使用 newlib:我建议阅读Balau 博客上来自 Francesco Balducci 的优秀文章 - 以下示例使用这种方法。

  3. 使用一种更像裸机的方法,例如下面描述的方法:它确实使用 sprintf()qemu-virt 机器的 pl011 UART显示结果字符串。

gcc_arm64_ram.ld:

/******************************************************************************
 * @file     gcc_arm32.ld
 * @brief    GNU Linker Script for Cortex-M based device
 * @version  V2.0.0
 * @date     21. May 2019
 ******************************************************************************/
/*
 * Copyright (c) 2009-2019 Arm Limited. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the License); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

MEMORY
{
  RAM   (rwx) : ORIGIN = __RAM_BASE, LENGTH = __RAM_SIZE
}

/* Linker script to place sections and symbol values. Should be used together
 * with other linker script that defines memory regions FLASH and RAM.
 * It references following symbols, which must be defined in code:
 *   Reset_Handler : Entry of reset handler
 *
 * It defines following symbols, which code can use without definition:
 *   __exidx_start
 *   __exidx_end
 *   __copy_table_start__
 *   __copy_table_end__
 *   __zero_table_start__
 *   __zero_table_end__
 *   __etext
 *   __data_start__
 *   __preinit_array_start
 *   __preinit_array_end
 *   __init_array_start
 *   __init_array_end
 *   __fini_array_start
 *   __fini_array_end
 *   __data_end__
 *   __bss_start__
 *   __bss_end__
 *   __end__
 *   end
 *   __HeapLimit
 *   __StackLimit
 *   __StackTop
 *   __stack
 */
ENTRY(Reset_Handler)

SECTIONS
{
  .text :
  {
    KEEP(*(.vectors))
    *(.text*)

    KEEP(*(.init))
    KEEP(*(.fini))

    /* .ctors */
    *crtbegin.o(.ctors)
    *crtbegin?.o(.ctors)
    *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors)
    *(SORT(.ctors.*))
    *(.ctors)

    /* .dtors */
    *crtbegin.o(.dtors)
    *crtbegin?.o(.dtors)
    *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors)
    *(SORT(.dtors.*))
    *(.dtors)

    *(.rodata*)

    KEEP(*(.eh_frame*))
  } > RAM

  /*
   * SG veneers:
   * All SG veneers are placed in the special output section .gnu.sgstubs. Its start address
   * must be set, either with the command line option �--section-start� or in a linker script,
   * to indicate where to place these veneers in memory.
   */
/*
  .gnu.sgstubs :
  {
    . = ALIGN(32);
  } > RAM
*/
  .ARM.extab :
  {
    *(.ARM.extab* .gnu.linkonce.armextab.*)
  } > RAM

  __exidx_start = .;
  .ARM.exidx :
  {
    *(.ARM.exidx* .gnu.linkonce.armexidx.*)
  } > RAM
  __exidx_end = .;

  .copy.table :
  {
    . = ALIGN(16);
    __copy_table_start__ = .;
    LONG (__etext)
    LONG (__data_start__)
    LONG (__data_end__ - __data_start__)
    /* Add each additional data section here */
/*
    LONG (__etext2)
    LONG (__data2_start__)
    LONG (__data2_end__ - __data2_start__)
*/
    __copy_table_end__ = .;
  } > RAM

  .zero.table :
  {
    . = ALIGN(16);
    __zero_table_start__ = .;
    /* Add each additional bss section here */
/*
    LONG (__bss2_start__)
    LONG (__bss2_end__ - __bss2_start__)
*/
    __zero_table_end__ = .;
  } > RAM

  /**
   * Location counter can end up 2byte aligned with narrow Thumb code but
   * __etext is assumed by startup code to be the LMA of a section in RAM
   * which must be 4byte aligned 
   */
  __etext = ALIGN(16);

  .data : AT (__etext)
  {
    __data_start__ = .;
    *(vtable)
    *(.data)
    *(.data.*)

    . = ALIGN(16);
    /* preinit data */
    PROVIDE_HIDDEN (__preinit_array_start = .);
    KEEP(*(.preinit_array))
    PROVIDE_HIDDEN (__preinit_array_end = .);

    . = ALIGN(16);
    /* init data */
    PROVIDE_HIDDEN (__init_array_start = .);
    KEEP(*(SORT(.init_array.*)))
    KEEP(*(.init_array))
    PROVIDE_HIDDEN (__init_array_end = .);


    . = ALIGN(16);
    /* finit data */
    PROVIDE_HIDDEN (__fini_array_start = .);
    KEEP(*(SORT(.fini_array.*)))
    KEEP(*(.fini_array))
    PROVIDE_HIDDEN (__fini_array_end = .);

    KEEP(*(.jcr*))
    . = ALIGN(16);
    /* All data end */
    __data_end__ = .;

  } > RAM

  /*
   * Secondary data section, optional
   *
   * Remember to add each additional data section
   * to the .copy.table above to asure proper
   * initialization during startup.
   */
/*
  __etext2 = ALIGN(16);

  .data2 : AT (__etext2)
  {
    . = ALIGN(16);
    __data2_start__ = .;
    *(.data2)
    *(.data2.*)
    . = ALIGN(16);
    __data2_end__ = .;

  } > RAM2
*/

  .bss :
  {
    . = ALIGN(16);
    __bss_start__ = .;
    *(.bss)
    *(.bss.*)
    *(COMMON)
    . = ALIGN(16);
    __bss_end__ = .;
  } > RAM AT > RAM

  /*
   * Secondary bss section, optional
   *
   * Remember to add each additional bss section
   * to the .zero.table above to asure proper
   * initialization during startup.
   */
/*
  .bss2 :
  {
    . = ALIGN(16);
    __bss2_start__ = .;
    *(.bss2)
    *(.bss2.*)
    . = ALIGN(16);
    __bss2_end__ = .;
  } > RAM2 AT > RAM2
*/

  .heap (COPY) :
  {
    . = ALIGN(16);
    __end__ = .;
    PROVIDE(end = .);
    . = . + __HEAP_SIZE;
    . = ALIGN(16);
    __HeapLimit = .;
  } > RAM

  .stack (ORIGIN(RAM) + LENGTH(RAM) - __STACK_SIZE) (COPY) :
  {
    . = ALIGN(16);
    __StackLimit = .;
    . = . + __STACK_SIZE;
    . = ALIGN(16);
    __StackTop = .;
  } > RAM
  PROVIDE(__stack = __StackTop);

  /* Check if data + heap + stack exceeds RAM limit */
  ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed with stack")
}

qemu-virt-aarch64.ld:

__RAM_BASE = 0x40000000;
__RAM_SIZE =  0x08000000;
__STACK_SIZE = 0x00100000;
__HEAP_SIZE  =  0x00100000;
INCLUDE gcc_arm64_ram.ld

startup.s:

                .title startup64.s
                .arch armv8-a
                .text
                .section .text.startup,"ax"    
                .globl Reset_Handler
Reset_Handler:
                ldr x0, =__StackTop
                mov sp, x0
                bl  main
wait:           wfe
                b wait
               .end

pl011.c:

#include <stdint.h>

static volatile unsigned int * const UART0DR = ( unsigned int * ) ( uintptr_t * ) 0x9000000;

int putchar(int c)
{
    *UART0DR = c; /* Transmit char */
     return c;
}

void putchar_uart0( int c )
{
    *UART0DR = c; /* Transmit char */
}

void putc_uart0( int c )
{
    *UART0DR = c; /* Transmit char */
}

void print_uart0( const char * s )
{
    while( *s != '[=13=]' )                     /* Loop until end of string */
    {
        *UART0DR = ( unsigned int ) ( *s ); /* Transmit char */
        s++;                                /* Next char */
    }
}

void puts_uart0( const char * s )
{
    while( *s != '[=13=]' )                     /* Loop until end of string */
    {
        *UART0DR = ( unsigned int ) ( *s ); /* Transmit char */
        if (*s == '\n') {
           *UART0DR = ( unsigned int ) ( '\r' );
        } 
        s++;                                /* Next char */
    }
}

pl011.h:

#pragma once

#ifdef __cplusplus
extern "C" {
#endif

void putchar_uart0( int c );
void print_uart0( const char * s );
void putc_uart0( int c );
void puts_uart0( const char * s );

#ifdef __cplusplus
}
#endif

qemu-virt-aarch64.c:

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <errno.h>

#include "pl011.h"

// angel/semihosting interface
#define SYS_WRITE0                       0x04 
static uint64_t semihosting_call(uint32_t operation, uint64_t parameter)
{
    __asm("HLT #0xF000");
}

// syscall stubs
int _close (int fd)
{
    errno = EBADF;
    return -1;
}

int _isatty (int fd)
{
    return 1;
}

int _fstat (int fd, struct stat * st)
{
    errno = EBADF;
    return -1;
}

off_t _lseek (int fd, off_t ptr, int dir)
{
    errno = EBADF;
    return (off_t) -1;
}

int _read (int fd, void *ptr, size_t len)
{
    errno = EBADF;
    return -1;
}

int _write (int fd, const char *ptr, size_t len)
{
    for (size_t i = 0; i < len; i++) {
        putchar_uart0(ptr[i]);
    }
    return len;
}

void main()
{
   char buffer[BUFSIZ];
   uint64_t regCurrentEL;

   __asm volatile ("mrs %0, CurrentEL" : "=r" (regCurrentEL));

   // UART0
   sprintf(buffer, "Hello EL%d World!\n", (regCurrentEL >> 2) & 0b11);
   puts_uart0(buffer);

   // angel/semihosting interface
   sprintf(buffer, "Hello semi-hosted EL%d World!\n", (regCurrentEL >> 2) & 0b11);
   semihosting_call(SYS_WRITE0, (uint64_t) (uintptr_t)  buffer);

   // newlib -  custom syscalls.c, with _write() using UART0
   printf("Hello EL%d World! (syscalls version)\n", (regCurrentEL >> 2) & 0b11);
}

请注意,负责初始化 .bss 部分的代码已被省略。

正在编译:

/opt/arm/9/gcc-arm-9.2-2019.12-x86_64-aarch64-none-elf/bin/aarch64-none-elf-gcc -I. -O0 -ggdb -mtune=cortex-a53 -nostartfiles -ffreestanding --specs=nosys.specs -L. -Wl,-T,qemu-virt-aarch64.ld -o virt.elf startup.s  pl011.c qemu-virt-aarch64.c 

运行:

/opt/qemu-5.2.0/bin/qemu-system-aarch64 -semihosting -m 128M -nographic  -monitor none -serial stdio  -machine virt,gic-version=2,secure=on,virtualization=on -cpu cortex-a53 -kernel virt.elf
Hello EL3 World!
Hello semi-hosted EL3 World!
Hello EL3 World! (syscalls version)