如何使用 IORING_OP_READ_FIXED?

How do I use IORING_OP_READ_FIXED?

在下一页https://lwn.net/Articles/810414/

IORING_OP_READ_FIXED IORING_OP_WRITE_FIXED These opcodes also submit I/O operations, but they use "registered" buffers that are already mapped into the kernel, reducing the amount of total overhead.

但是我在网上找不到一个关于如何使用它的例子。在 io_uring_enter 它说

EFAULT IORING_OP_READ_FIXED or IORING_OP_WRITE_FIXED was specified in the opcode field of the submission queue entry, but either buffers were not registered for this io_uring instance, or the address range described by addr and len does not fit within the buffer registered at buf_index.

在我看来,我应该选择一个内存地址并为其使用块,但使用像 0x555555500000 和 len 作为 4096 这样的地址会得到同样的错误。

IORING_OP_READ_FIXED 是如何工作的?下面是 IORING_OP_READ

的工作示例
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/mman.h>
#include <linux/io_uring.h>

#define read_barrier()  __asm__ __volatile__("":::"memory")
#define write_barrier() __asm__ __volatile__("":::"memory")

int main(int argc, char *argv[])
{
    struct io_uring_params uring;
    memset(&uring, 0, sizeof(uring));
    auto queue_size = 5;
    auto ring_fd = syscall(__NR_io_uring_setup, queue_size, &uring);

    auto*uring_ptr = (char*)mmap(0, uring.sq_off.array + uring.sq_entries * 4, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, ring_fd, IORING_OFF_SQ_RING);
    auto*submit_entries = (io_uring_sqe*)mmap(0, uring.sq_entries * sizeof(struct io_uring_sqe), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, ring_fd, IORING_OFF_SQES);

    unsigned &sqHead = *(unsigned*)(uring_ptr + uring.sq_off.head);
    unsigned &sqTail = *(unsigned*)(uring_ptr + uring.sq_off.tail);
    unsigned &sqMask = *(unsigned*)(uring_ptr + uring.sq_off.ring_mask);
    unsigned &sqFlags = *(unsigned*)(uring_ptr + uring.sq_off.flags);
    unsigned *sqArray = (unsigned*)(uring_ptr + uring.sq_off.array);

    unsigned &cqHead = *(unsigned*)(uring_ptr + uring.cq_off.head);
    unsigned &cqTail = *(unsigned*)(uring_ptr + uring.cq_off.tail);
    unsigned &cqMask = *(unsigned*)(uring_ptr + uring.cq_off.ring_mask);
    io_uring_cqe *cqes = (io_uring_cqe*)(uring_ptr + uring.cq_off.cqes);


    int fd[2];
    fd[0] = open(argv[1], O_RDONLY);
    

    struct stat stat;
    if (fstat(fd[0], &stat) < 0) {
        perror("fstat");
        return -1;
    }

    int size_aligned = (stat.st_size & ~63) + (stat.st_size & 63 ? 64 : 0);

    auto*fileBuf = (unsigned char*)malloc(size_aligned*2);

    for(int i=0; i<1; i++)
    {
        io_uring_sqe&sqe = submit_entries[sqTail & sqMask];
        sqe.fd = fd[i];
        sqe.flags = 0;
        sqe.opcode = IORING_OP_READ;
        sqe.addr = (unsigned long long)fileBuf+i*size_aligned;
        sqe.len = size_aligned;
        sqe.user_data = (unsigned long long)fileBuf+i*size_aligned;
        sqArray[sqTail&sqMask] = sqTail&sqMask;
        sqTail++;
    }
    write_barrier();

    //int ret =  syscall(__NR_io_uring_enter, ring_fd, 2, 2, IORING_ENTER_GETEVENTS, 0);
    int ret =  syscall(__NR_io_uring_enter, ring_fd, 1, 1, IORING_ENTER_GETEVENTS, 0);
    //int ret =  syscall(__NR_io_uring_enter, ring_fd, 1, 0, IORING_ENTER_GETEVENTS, 0);
    //sleep(1);
    read_barrier();
    while (cqHead != cqTail)
    {
        unsigned long long a = cqHead;
        unsigned long long b = cqTail;
        unsigned long long c = cqMask;
        auto index=cqHead & cqMask;
        io_uring_cqe&cqe = cqes[index];
        auto u=cqe.user_data;
        auto f=cqe.flags;
        auto r=cqe.res;
        puts((const char*)u);
        cqHead++;
    }
    int a=0;
    return 0;
}

您需要使用 __NR_io_uring_register 系统调用注册(和取消注册)您的缓冲区。 使用您的示例,您可以将缓冲区应用于 iovec 并将 iovec 传递到系统调用中:

struct iovec iov = { .iov_base = (void *)fileBuf, .iov_len = (size_aligned*2) };

int rc = syscall(__NR_io_uring_register, ring_fd, IORING_REGISTER_BUFFERS, 
                 (void *)&iov, 1 /* number of iovs */);

要使用已注册的缓冲区,您需要在 sqe 中提供缓冲区的数组偏移量。在此示例中,该值将始终为“0”,因为只注册了一个 iovec。在您的代码中,您需要设置 sqe.opcode = IORING_OP_READ_FIXEDsqe.buf_index = 0.

您可能还想考虑使用 liburing.h:

中的初始化辅助函数
static inline void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd,
                                            void *buf, unsigned nbytes,
                                            off_t offset, int buf_index)
{
        io_uring_prep_rw(IORING_OP_READ_FIXED, sqe, fd, buf, nbytes, offset);
        sqe->buf_index = buf_index;
}

我建议查看 liburing - 它恰当地处理了许多繁琐的初始化细节,并为缓冲区 [un] 注册提供包装器。