内核驱动程序的“释放”文件操作处理程序是否等待其他 fop 完成?

Does a kernel driver's `release` file-operations handler wait for other fops to finish?

在 linux 内核设备驱动程序的情况下,存在 file_operations 结构或 fops 结构,它允许驱动程序为各种文件操作定义处理程序。

我的问题是关于 .release fop 处理程序。

我知道 release 处理程序将 只有 file 对象的最后一个文件描述符 (fd) 关闭(或映射)时被调用.这是在 file 上调用 fput 并且 file->f_count 达到 0 时完成的。

但是 - 我不清楚当输入 release 时其他文件操作是否可以在另一个线程中同时 运行。

例如:

进程的 1 个线程是否可以在 file(或 fd)的 ioctl 处理程序内,而同一进程的另一个线程是否在 release 处理程序内?

release 能否成为 file 对象竞争条件的一个因素?

could 1 thread of a process be inside the ioctl handler for the file (or fd), while another thread of the same process is inside of the release handler?

没有。 release入口点在 文件条目为 0。ioctl() 增加文件的引用计数器。因此,当 ioctl() 在轨道上时,不会调用 release 入口点。

前言

下面讨论的源代码是:

  • GLIBC 2.31
  • Linux 5.4

GLIBC 的 pthread 管理

GLIBC 的 pthread_create() 实际上涉及一个 clone() 系统调用 以下标志:

CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID

根据clone()manualCLONE_FILES标志使得一个线程进程

share the same file descriptor table. Any file descriptor created by

一个线程在其他线程中也有效。类似地,如果一个线程关闭文件描述符,或更改其关联标志(使用 fcntl() F_SETFD 操作),则其他线程也受到影响。

内核端的clone()

clone()CLONE_FILES时,files_struct 没有被复制,但是引用计数器增加了。因此,两个线程的任务结构都指向相同的 files_struct (files 字段):

。任务结构定义在include/linux/sched.h:

struct task_struct {
[...]
    /* Open file information: */
    struct files_struct     *files; /// <==== Table of open files shared between thread
[...]

。在 kernel/fork.c 中,clone() 服务调用 copy_files() 增加 files_struct

上的引用计数器
static int copy_files(unsigned long clone_flags, struct task_struct *tsk)
{
    struct files_struct *oldf, *newf;
    int error = 0;

    /*
     * A background process may not have any files ...
     */
    oldf = current->files;
    if (!oldf)
        goto out;

    if (clone_flags & CLONE_FILES) {
      atomic_inc(&oldf->count);  // <==== Ref counter incremented: files_struct is shared
        goto out;
    }

    newf = dup_fd(oldf, &error);
    if (!newf)
        goto out;

    tsk->files = newf;
    error = 0;
out:
    return error;
}

files_struct 定义在 include/linux/fdtable.h:

/*
 * Open file table structure
 */
struct files_struct {
  /*
   * read mostly part
   */
        atomic_t count;  // <==== Reference counter
    bool resize_in_progress;
    wait_queue_head_t resize_wait;

    struct fdtable __rcu *fdt;
    struct fdtable fdtab;
  /*
   * written part on a separate cache line in SMP
   */
    spinlock_t file_lock ____cacheline_aligned_in_smp;
    unsigned int next_fd;
    unsigned long close_on_exec_init[1];
    unsigned long open_fds_init[1];
    unsigned long full_fds_bits_init[1];
    struct file __rcu * fd_array[NR_OPEN_DEFAULT];

ioctl() 操作

ioctl()系统调用定义为fs/ioctl.c。它首先调用 fdget() 来增加文件条目上的引用计数器,执行请求的操作,然后调用 fdput()

int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
{
    int error;
    struct fd f = fdget(fd);

    if (!f.file)
        return -EBADF;
    error = security_file_ioctl(f.file, cmd, arg);
    if (!error)
        error = do_vfs_ioctl(f.file, fd, cmd, arg);
    fdput(f);
    return error;
}

SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
{
    return ksys_ioctl(fd, cmd, arg);
}

文件条目在include/linux/fs.h中定义。它的引用计数器是f_count字段:

struct file {
    union {
        struct llist_node   fu_llist;
        struct rcu_head     fu_rcuhead;
    } f_u;
    struct path     f_path;
    struct inode        *f_inode;   /* cached value */
    const struct file_operations    *f_op;

    /*
     * Protects f_ep_links, f_flags.
     * Must not be taken from IRQ context.
     */
    spinlock_t      f_lock;
    enum rw_hint        f_write_hint;
        atomic_long_t       f_count;  // <===== Reference counter
    unsigned int        f_flags;
[...]
} __randomize_layout
  __attribute__((aligned(4)));

例子

这是一个简单的设备驱动程序,其中的文件操作仅在触发时显示一条消息。 ioctl() 条目使调用者休眠 5 秒:

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/kdev_t.h>
#include <linux/cdev.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
#include <linux/delay.h>


MODULE_LICENSE("GPL");

#define DEVICE_NAME "device"

static int device_open(struct inode *, struct file *);
static int device_release(struct inode *, struct file *);
static ssize_t device_read(struct file *, char *, size_t, loff_t *);
static ssize_t device_write(struct file *, const char *, size_t, loff_t *);
static long int device_ioctl(struct file *, unsigned int, unsigned long);
static int device_flush(struct file *, fl_owner_t);

static const struct file_operations fops = {
    .owner = THIS_MODULE,
    .read = device_read,
    .write = device_write,
    .unlocked_ioctl = device_ioctl,
    .open = device_open,
    .flush = device_flush,
    .release = device_release
};

struct cdev *device_cdev;
dev_t deviceNumbers;

static  int __init init(void)
{
  // This returns the major number chosen dynamically in deviceNumbers
  int ret = alloc_chrdev_region(&deviceNumbers, 0, 1, DEVICE_NAME);

  if (ret < 0) {
    printk(KERN_ALERT "Error registering: %d\n", ret);
    return -1;
  }

  device_cdev = cdev_alloc();

  cdev_init(device_cdev, &fops);

  ret = cdev_add(device_cdev, deviceNumbers, 1);

  printk(KERN_INFO "Device initialized (major number is %d)\n", MAJOR(deviceNumbers));

  return 0;
}

static void __exit cleanup(void)
{
  unregister_chrdev_region(deviceNumbers, 1);

  cdev_del(device_cdev);

  printk(KERN_INFO "Device unloaded\n");
}

static int device_open(struct inode *inode, struct file *file)
{
  printk(KERN_INFO "Device open\n");
  return 0;
}

static int device_flush(struct file *file, fl_owner_t id)
{
  printk(KERN_INFO "Device flush\n");
  return 0;
}

static int device_release(struct inode *inode, struct file *file)
{
  printk(KERN_INFO "Device released\n");
  return 0;
}


static ssize_t device_write(struct file *filp, const char *buff, size_t len, loff_t * off)
{
  printk(KERN_INFO "Device write\n");
  return len;
}

static ssize_t device_read(struct file *filp, char *buff, size_t len, loff_t * off)
{
  printk(KERN_INFO "Device read\n");
  return 0;
}

static long int device_ioctl(struct file *file, unsigned int ioctl_num, unsigned long ioctl_param)
{
  printk(KERN_INFO "Device ioctl enter\n");
  msleep_interruptible(5000);
  printk(KERN_INFO "Device ioctl out\n");
  return 0;
}

module_init(init);
module_exit(cleanup);

这是一个用户space程序,涉及主线程和次线程。主线程打开上面的设备,等待副线程启动(barrier)1秒后关闭设备。同时,辅助线程在上述设备上调用 ioctl() 使其休眠 5 秒。然后它在退出前第二次调用 ioctl()
预期的行为是让主线程关闭设备文件,而辅助线程是 运行 ioctl().

#include <stdio.h>
#include <pthread.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <errno.h>

static int dev_fd;

static pthread_barrier_t barrier;


void *entry(void *arg)
{
  int rc;

  printf("Thread running...\n");

  // Rendez-vous with main thread
  pthread_barrier_wait(&barrier);

  rc = ioctl(dev_fd, 0);
  printf("rc = %d, errno = %d\n", rc, errno);
  
  rc = ioctl(dev_fd, 0);
  printf("rc = %d, errno = %d\n", rc, errno);

  return NULL;
}

int main(void)
{
  pthread_t tid;

  dev_fd = open("/dev/device", O_RDWR);

  pthread_barrier_init(&barrier, NULL, 2);

  pthread_create(&tid,NULL, entry, NULL);

  pthread_barrier_wait(&barrier);

  sleep(1);

  close(dev_fd);

  pthread_join(tid,NULL);

  return 0;
}

安装内核模块:

$ sudo insmod ./device.ko
$ dmesg
[13270.589766] Device initialized (major number is 237)
$ sudo mknod /dev/device c 237 0
$ sudo chmod 666 /dev/device 
$ ls -l /dev/device 
crw-rw-rw- 1 root root 237, 0 janv.  27 10:55 /dev/device

程序的执行表明,第一个ioctl()让线程等待了5秒。但是第二个 returns 与 EBADF (9) 错误,因为同时设备文件已被主线程关闭:

$ gcc p1.c -lpthread
$ ./a.out
Thread running...
rc = 0, errno = 0
rc = -1, errno = 9

在内核日志中,我们可以看到主线程中的close()只是触发了一个flush()操作当第一个 ioctl() 在辅助线程中运行时设备。然后,一旦第一个 ioctl() 返回,内核内部释放了文件条目(引用计数器降为 0),因此,第二个 ioctl() 没有到达设备,因为文件描述符不再引用打开的文件。因此,第二次调用的 EBADF 错误:

[13270.589766] Device initialized (major number is 237)
[13656.862951] Device open        <==== Open() in the main thread
[13656.863315] Device ioctl enter <==== 1st ioctl() in secondary thread
[13657.863523] Device flush       <==== 1 s later, flush() = close() in the main thread
[13661.941238] Device ioctl out   <==== 5 s later, the 1st ioctl() returns
[13661.941244] Device released    <==== The file is released because the reference counter reached 0