通过 UFFDIO_API ioctl 检查时,Userfaultfd 写保护似乎不受支持

Userfaultfd write protection appears unsupported when checking through the UFFDIO_API ioctl

我正在尝试使用 Linux 的 userfaultfd 的写保护功能,但它似乎没有在我的内核中启用,即使我使用的是 5.13 版(写保护应该在5.10+).

当我运行

#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <linux/userfaultfd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <unistd.h>

#define errExit(msg)        \
    do {                    \
        perror(msg);        \
        exit(EXIT_FAILURE); \
    } while (0)

static int has_bit(uint64_t val, uint64_t bit) {
    return (val & bit) == bit;
}

int main() {
    long uffd;     /* userfaultfd file descriptor */
    struct uffdio_api uffdio_api;

    uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
    if (uffd == -1)
        errExit("userfaultfd");

    uffdio_api.api = UFFD_API;
    uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP;
    if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1)
        errExit("ioctl-UFFDIO_API");

    printf("UFFDIO_API: %d\n", has_bit(uffdio_api.ioctls, 1UL << _UFFDIO_API));
    printf("UFFDIO_REGISTER: %d\n", has_bit(uffdio_api.ioctls, 1UL << _UFFDIO_REGISTER));
    printf("UFFDIO_UNREGISTER: %d\n", has_bit(uffdio_api.ioctls, 1UL << _UFFDIO_UNREGISTER));
    printf("UFFDIO_WRITEPROTECT: %d\n", has_bit(uffdio_api.ioctls, 1UL << _UFFDIO_WRITEPROTECT));
    printf("UFFD_FEATURE_PAGEFAULT_FLAG_WP: %d\n", has_bit(uffdio_api.features, UFFD_FEATURE_PAGEFAULT_FLAG_WP));
}

输出为

UFFDIO_API: 1
UFFDIO_REGISTER: 1
UFFDIO_UNREGISTER: 1
UFFDIO_WRITEPROTECT: 0
UFFD_FEATURE_PAGEFAULT_FLAG_WP: 1

UFFD_FEATURE_PAGEFAULT_FLAG_WP 功能已启用,但 UFFDIO_WRITEPROTECT ioctl 被标记为不受支持,这是启用写保护所必需的。

什么可能导致此功能被禁用,我该如何启用它?

我正在使用 Ubuntu MATE 21.10 和 Linux 内核版本 5.13.0-30-generic

编辑:

尽管在 UFFD_API ioctl (https://man7.org/linux/man-pages/man2/ioctl_userfaultfd.2.html) 的手册页部分,这似乎是启用写保护的系统的预期行为。但是,当我 运行 生成轮询线程并写入受保护内存的完整程序时,轮询线程不会收到任何通知。

#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <linux/userfaultfd.h>
#include <poll.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>

#define errExit(msg)        \
    do {                    \
        perror(msg);        \
        exit(EXIT_FAILURE); \
    } while (0)

static int page_size;

static void* fault_handler_thread(void* arg) {
    long uffd;                  /* userfaultfd file descriptor */
    uffd = (long) arg;

    /* Loop, handling incoming events on the userfaultfd
       file descriptor. */

    for (;;) {
        /* See what poll() tells us about the userfaultfd. */

        struct pollfd pollfd;
        int nready;
        pollfd.fd = uffd;
        pollfd.events = POLLIN;
        nready = poll(&pollfd, 1, -1);
        if (nready == -1)
            errExit("poll");

        printf("\nfault_handler_thread():\n");
        printf(
            "    poll() returns: nready = %d; "
            "POLLIN = %d; POLLERR = %d\n",
            nready, (pollfd.revents & POLLIN) != 0,
            (pollfd.revents & POLLERR) != 0);

        // received fault, exit the program
        exit(EXIT_FAILURE);
    }
}

int main() {
    long uffd;     /* userfaultfd file descriptor */
    char* addr;    /* Start of region handled by userfaultfd */
    uint64_t len;  /* Length of region handled by userfaultfd */
    pthread_t thr; /* ID of thread that handles page faults */
    struct uffdio_api uffdio_api;
    struct uffdio_register uffdio_register;
    struct uffdio_writeprotect uffdio_wp;
    int s;

    page_size = sysconf(_SC_PAGE_SIZE);
    len = page_size;

    /* Create and enable userfaultfd object. */

    uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
    if (uffd == -1)
        errExit("userfaultfd");

    uffdio_api.api = UFFD_API;
    uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP;
    if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1)
        errExit("ioctl-UFFDIO_API");

    addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
    if (addr == MAP_FAILED)
        errExit("mmap");

    printf("Address returned by mmap() = %p\n", addr);

    /* Register the memory range of the mapping we just created for
       handling by the userfaultfd object. */

    uffdio_register.range.start = (unsigned long) addr;
    uffdio_register.range.len = len;
    uffdio_register.mode = UFFDIO_REGISTER_MODE_WP;
    if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1)
        errExit("ioctl-UFFDIO_REGISTER");

    printf("uffdio_register.ioctls = 0x%llx\n", uffdio_register.ioctls);
    printf("Have _UFFDIO_WRITEPROTECT? %s\n", (uffdio_register.ioctls & _UFFDIO_WRITEPROTECT) ? "YES" : "NO");

    uffdio_wp.range.start = (unsigned long) addr;
    uffdio_wp.range.len = len;
    uffdio_wp.mode = UFFDIO_WRITEPROTECT_MODE_WP;
    if (ioctl(uffd, UFFDIO_WRITEPROTECT, &uffdio_wp) == -1)
        errExit("ioctl-UFFDIO_WRITEPROTECT");

    /* Create a thread that will process the userfaultfd events. */

    s = pthread_create(&thr, NULL, fault_handler_thread, (void*) uffd);
    if (s != 0) {
        errno = s;
        errExit("pthread_create");
    }

    /* Main thread now touches memory in the mapping, touching
       locations 1024 bytes apart. This will trigger userfaultfd
       events for all pages in the region. */

    usleep(100000);

    size_t l;
    l = 0xf; /* Ensure that faulting address is not on a page
                boundary, in order to test that we correctly
                handle that case in fault_handling_thread(). */
    char i = 0;
    while (l < len) {
        printf("Write address %p in main(): ", addr + l);
        addr[l] = i++;
        printf("%d\n", addr[l]);
        l += 1024;
        usleep(100000); /* Slow things down a little */
    }

    exit(EXIT_SUCCESS);
}

UFFD_API ioctl 似乎从未报告过 _UFFD_WRITEPROTECT,正如在内核源代码 (1, 2) 中所见。我假设这是因为是否支持此操作取决于底层映射的类型。

该功能实际上是在 per-registered-range 基础上报道的。您必须先将 API 设置为 ioctl(uffd, UFFDIO_API, ...),然后使用 ioctl(uffd, UFFDIO_REGISTER, ...) 注册一个范围,然后 检查 uffdio_register.ioctls 字段。

#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <linux/userfaultfd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <sys/mman.h>
#include <unistd.h>

#define errExit(msg)        \
    do {                    \
        perror(msg);        \
        exit(EXIT_FAILURE); \
    } while (0)

int main(void) {
    long uffd;

    uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
    if (uffd == -1)
        errExit("userfaultfd");

    struct uffdio_api uffdio_api = { .api = UFFD_API };

    if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1)
        errExit("ioctl(UFFDIO_API)");

    const size_t region_sz = 0x4000;
    void *region = mmap(NULL, region_sz, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
    if (region == MAP_FAILED)
        errExit("mmap");

    if (posix_memalign((void **)region, sysconf(_SC_PAGESIZE), region_sz))
        errExit("posix_memalign");

    printf("Region mapped at %p - %p\n", region, region + region_sz);

    struct uffdio_register uffdio_register = {
        .range = { .start = (unsigned long)region, .len = region_sz },
        .mode = UFFDIO_REGISTER_MODE_WP
    };

    if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1)
        errExit("ioctl(UFFDIO_REGISTER)");

    printf("uffdio_register.ioctls = 0x%llx\n", uffdio_register.ioctls);
    printf("Have _UFFDIO_WRITEPROTECT? %s\n", (uffdio_register.ioctls & _UFFDIO_WRITEPROTECT) ? "YES" : "NO");

    if ((uffdio_register.ioctls & UFFD_API_RANGE_IOCTLS) != UFFD_API_RANGE_IOCTLS)
        errExit("bad ioctl set");

    struct uffdio_writeprotect wp = {
        .range = { .start = (unsigned long)region, .len = region_sz },
        .mode = UFFDIO_WRITEPROTECT_MODE_WP
    };

    if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp) == -1)
        errExit("ioctl(UFFDIO_WRITEPROTECT)");

    puts("ioctl(UFFDIO_WRITEPROTECT) successful.");
    return EXIT_SUCCESS;
}

输出:

Region mapped at 0x7f45c48fe000 - 0x7f45c4902000
uffdio_register.ioctls = 0x5c
Have _UFFDIO_WRITEPROTECT? YES
ioctl(UFFDIO_WRITEPROTECT) successful.

我找到了解决方案。 write-protected 页面必须在注册后但在标记为 write-protected 之前触摸。据我所知,这是一个未记录的要求。

换句话说,添加

for (size_t i = 0; i < len; i += page_size) {
    addr[i] = 0;
}

在注册和write-protecting之间。

如果我将完整示例更改为

,它会起作用
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <linux/userfaultfd.h>
#include <poll.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>

#define errExit(msg)        \
    do {                    \
        perror(msg);        \
        exit(EXIT_FAILURE); \
    } while (0)

static int page_size;

static void* fault_handler_thread(void* arg) {
    long uffd;                  /* userfaultfd file descriptor */
    uffd = (long) arg;

    /* Loop, handling incoming events on the userfaultfd
       file descriptor. */

    for (;;) {
        /* See what poll() tells us about the userfaultfd. */

        struct pollfd pollfd;
        int nready;
        pollfd.fd = uffd;
        pollfd.events = POLLIN;
        nready = poll(&pollfd, 1, -1);
        if (nready == -1)
            errExit("poll");

        printf("\nfault_handler_thread():\n");
        printf(
            "    poll() returns: nready = %d; "
            "POLLIN = %d; POLLERR = %d\n",
            nready, (pollfd.revents & POLLIN) != 0,
            (pollfd.revents & POLLERR) != 0);

        // received fault, exit the program
        exit(EXIT_FAILURE);
    }
}

int main() {
    long uffd;     /* userfaultfd file descriptor */
    char* addr;    /* Start of region handled by userfaultfd */
    uint64_t len;  /* Length of region handled by userfaultfd */
    pthread_t thr; /* ID of thread that handles page faults */
    struct uffdio_api uffdio_api;
    struct uffdio_register uffdio_register;
    struct uffdio_writeprotect uffdio_wp;
    int s;

    page_size = sysconf(_SC_PAGE_SIZE);
    len = page_size;

    /* Create and enable userfaultfd object. */

    uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
    if (uffd == -1)
        errExit("userfaultfd");

    uffdio_api.api = UFFD_API;
    uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP;
    if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1)
        errExit("ioctl-UFFDIO_API");

    addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
    if (addr == MAP_FAILED)
        errExit("mmap");

    printf("Address returned by mmap() = %p\n", addr);

    /* Register the memory range of the mapping we just created for
       handling by the userfaultfd object. */

    uffdio_register.range.start = (unsigned long) addr;
    uffdio_register.range.len = len;
    uffdio_register.mode = UFFDIO_REGISTER_MODE_WP;
    if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1)
        errExit("ioctl-UFFDIO_REGISTER");

    printf("uffdio_register.ioctls = 0x%llx\n", uffdio_register.ioctls);
    printf("Have _UFFDIO_WRITEPROTECT? %s\n", (uffdio_register.ioctls & _UFFDIO_WRITEPROTECT) ? "YES" : "NO");

    for (size_t i = 0; i < len; i += page_size) {
        addr[i] = 0;
    }

    uffdio_wp.range.start = (unsigned long) addr;
    uffdio_wp.range.len = len;
    uffdio_wp.mode = UFFDIO_WRITEPROTECT_MODE_WP;
    if (ioctl(uffd, UFFDIO_WRITEPROTECT, &uffdio_wp) == -1)
        errExit("ioctl-UFFDIO_WRITEPROTECT");

    /* Create a thread that will process the userfaultfd events. */

    s = pthread_create(&thr, NULL, fault_handler_thread, (void*) uffd);
    if (s != 0) {
        errno = s;
        errExit("pthread_create");
    }

    /* Main thread now touches memory in the mapping, touching
       locations 1024 bytes apart. This will trigger userfaultfd
       events for all pages in the region. */

    usleep(100000);

    size_t l;
    l = 0xf; /* Ensure that faulting address is not on a page
                boundary, in order to test that we correctly
                handle that case in fault_handling_thread(). */
    char i = 0;
    while (l < len) {
        printf("Write address %p in main(): ", addr + l);
        addr[l] = i++;
        printf("%d\n", addr[l]);
        l += 1024;
        usleep(100000); /* Slow things down a little */
    }

    exit(EXIT_SUCCESS);
}