内核驱动程序的“释放”文件操作处理程序是否等待其他 fop 完成?
Does a kernel driver's `release` file-operations handler wait for other fops to finish?
在 linux 内核设备驱动程序的情况下,存在 file_operations
结构或 fops 结构,它允许驱动程序为各种文件操作定义处理程序。
我的问题是关于 .release
fop 处理程序。
我知道 release
处理程序将 只有 在 file
对象的最后一个文件描述符 (fd) 关闭(或映射)时被调用.这是在 file
上调用 fput
并且 file->f_count
达到 0 时完成的。
但是 - 我不清楚当输入 release
时其他文件操作是否可以在另一个线程中同时 运行。
例如:
进程的 1 个线程是否可以在 file
(或 fd)的 ioctl
处理程序内,而同一进程的另一个线程是否在 release
处理程序内?
release
能否成为 file
对象竞争条件的一个因素?
could 1 thread of a process be inside the ioctl handler for the file (or fd), while another thread of the same process is inside of the release handler?
没有。 release入口点在
文件条目为 0。ioctl() 增加文件的引用计数器。因此,当 ioctl() 在轨道上时,不会调用 release 入口点。
前言
下面讨论的源代码是:
- GLIBC 2.31
- Linux 5.4
GLIBC 的 pthread 管理
GLIBC 的 pthread_create() 实际上涉及一个 clone() 系统调用
以下标志:
CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID
根据clone()的manual,CLONE_FILES标志使得一个线程进程
share the same file descriptor table. Any file descriptor created by
一个线程在其他线程中也有效。类似地,如果一个线程关闭文件描述符,或更改其关联标志(使用 fcntl() F_SETFD 操作),则其他线程也受到影响。
内核端的clone()
当clone()传CLONE_FILES时,files_struct 没有被复制,但是引用计数器增加了。因此,两个线程的任务结构都指向相同的 files_struct (files 字段):
。任务结构定义在include/linux/sched.h:
struct task_struct {
[...]
/* Open file information: */
struct files_struct *files; /// <==== Table of open files shared between thread
[...]
。在 kernel/fork.c 中,clone() 服务调用 copy_files() 增加 files_struct
上的引用计数器
static int copy_files(unsigned long clone_flags, struct task_struct *tsk)
{
struct files_struct *oldf, *newf;
int error = 0;
/*
* A background process may not have any files ...
*/
oldf = current->files;
if (!oldf)
goto out;
if (clone_flags & CLONE_FILES) {
atomic_inc(&oldf->count); // <==== Ref counter incremented: files_struct is shared
goto out;
}
newf = dup_fd(oldf, &error);
if (!newf)
goto out;
tsk->files = newf;
error = 0;
out:
return error;
}
。 files_struct 定义在 include/linux/fdtable.h:
/*
* Open file table structure
*/
struct files_struct {
/*
* read mostly part
*/
atomic_t count; // <==== Reference counter
bool resize_in_progress;
wait_queue_head_t resize_wait;
struct fdtable __rcu *fdt;
struct fdtable fdtab;
/*
* written part on a separate cache line in SMP
*/
spinlock_t file_lock ____cacheline_aligned_in_smp;
unsigned int next_fd;
unsigned long close_on_exec_init[1];
unsigned long open_fds_init[1];
unsigned long full_fds_bits_init[1];
struct file __rcu * fd_array[NR_OPEN_DEFAULT];
ioctl() 操作
ioctl()系统调用定义为fs/ioctl.c。它首先调用 fdget() 来增加文件条目上的引用计数器,执行请求的操作,然后调用 fdput()
int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
{
int error;
struct fd f = fdget(fd);
if (!f.file)
return -EBADF;
error = security_file_ioctl(f.file, cmd, arg);
if (!error)
error = do_vfs_ioctl(f.file, fd, cmd, arg);
fdput(f);
return error;
}
SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
{
return ksys_ioctl(fd, cmd, arg);
}
文件条目在include/linux/fs.h中定义。它的引用计数器是f_count字段:
struct file {
union {
struct llist_node fu_llist;
struct rcu_head fu_rcuhead;
} f_u;
struct path f_path;
struct inode *f_inode; /* cached value */
const struct file_operations *f_op;
/*
* Protects f_ep_links, f_flags.
* Must not be taken from IRQ context.
*/
spinlock_t f_lock;
enum rw_hint f_write_hint;
atomic_long_t f_count; // <===== Reference counter
unsigned int f_flags;
[...]
} __randomize_layout
__attribute__((aligned(4)));
例子
这是一个简单的设备驱动程序,其中的文件操作仅在触发时显示一条消息。 ioctl() 条目使调用者休眠 5 秒:
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/kdev_t.h>
#include <linux/cdev.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
#include <linux/delay.h>
MODULE_LICENSE("GPL");
#define DEVICE_NAME "device"
static int device_open(struct inode *, struct file *);
static int device_release(struct inode *, struct file *);
static ssize_t device_read(struct file *, char *, size_t, loff_t *);
static ssize_t device_write(struct file *, const char *, size_t, loff_t *);
static long int device_ioctl(struct file *, unsigned int, unsigned long);
static int device_flush(struct file *, fl_owner_t);
static const struct file_operations fops = {
.owner = THIS_MODULE,
.read = device_read,
.write = device_write,
.unlocked_ioctl = device_ioctl,
.open = device_open,
.flush = device_flush,
.release = device_release
};
struct cdev *device_cdev;
dev_t deviceNumbers;
static int __init init(void)
{
// This returns the major number chosen dynamically in deviceNumbers
int ret = alloc_chrdev_region(&deviceNumbers, 0, 1, DEVICE_NAME);
if (ret < 0) {
printk(KERN_ALERT "Error registering: %d\n", ret);
return -1;
}
device_cdev = cdev_alloc();
cdev_init(device_cdev, &fops);
ret = cdev_add(device_cdev, deviceNumbers, 1);
printk(KERN_INFO "Device initialized (major number is %d)\n", MAJOR(deviceNumbers));
return 0;
}
static void __exit cleanup(void)
{
unregister_chrdev_region(deviceNumbers, 1);
cdev_del(device_cdev);
printk(KERN_INFO "Device unloaded\n");
}
static int device_open(struct inode *inode, struct file *file)
{
printk(KERN_INFO "Device open\n");
return 0;
}
static int device_flush(struct file *file, fl_owner_t id)
{
printk(KERN_INFO "Device flush\n");
return 0;
}
static int device_release(struct inode *inode, struct file *file)
{
printk(KERN_INFO "Device released\n");
return 0;
}
static ssize_t device_write(struct file *filp, const char *buff, size_t len, loff_t * off)
{
printk(KERN_INFO "Device write\n");
return len;
}
static ssize_t device_read(struct file *filp, char *buff, size_t len, loff_t * off)
{
printk(KERN_INFO "Device read\n");
return 0;
}
static long int device_ioctl(struct file *file, unsigned int ioctl_num, unsigned long ioctl_param)
{
printk(KERN_INFO "Device ioctl enter\n");
msleep_interruptible(5000);
printk(KERN_INFO "Device ioctl out\n");
return 0;
}
module_init(init);
module_exit(cleanup);
这是一个用户space程序,涉及主线程和次线程。主线程打开上面的设备,等待副线程启动(barrier)1秒后关闭设备。同时,辅助线程在上述设备上调用 ioctl() 使其休眠 5 秒。然后它在退出前第二次调用 ioctl()。
预期的行为是让主线程关闭设备文件,而辅助线程是 运行 ioctl().
#include <stdio.h>
#include <pthread.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <errno.h>
static int dev_fd;
static pthread_barrier_t barrier;
void *entry(void *arg)
{
int rc;
printf("Thread running...\n");
// Rendez-vous with main thread
pthread_barrier_wait(&barrier);
rc = ioctl(dev_fd, 0);
printf("rc = %d, errno = %d\n", rc, errno);
rc = ioctl(dev_fd, 0);
printf("rc = %d, errno = %d\n", rc, errno);
return NULL;
}
int main(void)
{
pthread_t tid;
dev_fd = open("/dev/device", O_RDWR);
pthread_barrier_init(&barrier, NULL, 2);
pthread_create(&tid,NULL, entry, NULL);
pthread_barrier_wait(&barrier);
sleep(1);
close(dev_fd);
pthread_join(tid,NULL);
return 0;
}
安装内核模块:
$ sudo insmod ./device.ko
$ dmesg
[13270.589766] Device initialized (major number is 237)
$ sudo mknod /dev/device c 237 0
$ sudo chmod 666 /dev/device
$ ls -l /dev/device
crw-rw-rw- 1 root root 237, 0 janv. 27 10:55 /dev/device
程序的执行表明,第一个ioctl()让线程等待了5秒。但是第二个 returns 与 EBADF (9) 错误,因为同时设备文件已被主线程关闭:
$ gcc p1.c -lpthread
$ ./a.out
Thread running...
rc = 0, errno = 0
rc = -1, errno = 9
在内核日志中,我们可以看到主线程中的close()只是触发了一个flush()操作当第一个 ioctl() 在辅助线程中运行时设备。然后,一旦第一个 ioctl() 返回,内核内部释放了文件条目(引用计数器降为 0),因此,第二个 ioctl() 没有到达设备,因为文件描述符不再引用打开的文件。因此,第二次调用的 EBADF 错误:
[13270.589766] Device initialized (major number is 237)
[13656.862951] Device open <==== Open() in the main thread
[13656.863315] Device ioctl enter <==== 1st ioctl() in secondary thread
[13657.863523] Device flush <==== 1 s later, flush() = close() in the main thread
[13661.941238] Device ioctl out <==== 5 s later, the 1st ioctl() returns
[13661.941244] Device released <==== The file is released because the reference counter reached 0
在 linux 内核设备驱动程序的情况下,存在 file_operations
结构或 fops 结构,它允许驱动程序为各种文件操作定义处理程序。
我的问题是关于 .release
fop 处理程序。
我知道 release
处理程序将 只有 在 file
对象的最后一个文件描述符 (fd) 关闭(或映射)时被调用.这是在 file
上调用 fput
并且 file->f_count
达到 0 时完成的。
但是 - 我不清楚当输入 release
时其他文件操作是否可以在另一个线程中同时 运行。
例如:
进程的 1 个线程是否可以在 file
(或 fd)的 ioctl
处理程序内,而同一进程的另一个线程是否在 release
处理程序内?
release
能否成为 file
对象竞争条件的一个因素?
could 1 thread of a process be inside the ioctl handler for the file (or fd), while another thread of the same process is inside of the release handler?
没有。 release入口点在 文件条目为 0。ioctl() 增加文件的引用计数器。因此,当 ioctl() 在轨道上时,不会调用 release 入口点。
前言
下面讨论的源代码是:
- GLIBC 2.31
- Linux 5.4
GLIBC 的 pthread 管理
GLIBC 的 pthread_create() 实际上涉及一个 clone() 系统调用 以下标志:
CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID
根据clone()的manual,CLONE_FILES标志使得一个线程进程
share the same file descriptor table. Any file descriptor created by
一个线程在其他线程中也有效。类似地,如果一个线程关闭文件描述符,或更改其关联标志(使用 fcntl() F_SETFD 操作),则其他线程也受到影响。
内核端的clone()
当clone()传CLONE_FILES时,files_struct 没有被复制,但是引用计数器增加了。因此,两个线程的任务结构都指向相同的 files_struct (files 字段):
。任务结构定义在include/linux/sched.h:
struct task_struct {
[...]
/* Open file information: */
struct files_struct *files; /// <==== Table of open files shared between thread
[...]
。在 kernel/fork.c 中,clone() 服务调用 copy_files() 增加 files_struct
上的引用计数器static int copy_files(unsigned long clone_flags, struct task_struct *tsk)
{
struct files_struct *oldf, *newf;
int error = 0;
/*
* A background process may not have any files ...
*/
oldf = current->files;
if (!oldf)
goto out;
if (clone_flags & CLONE_FILES) {
atomic_inc(&oldf->count); // <==== Ref counter incremented: files_struct is shared
goto out;
}
newf = dup_fd(oldf, &error);
if (!newf)
goto out;
tsk->files = newf;
error = 0;
out:
return error;
}
。 files_struct 定义在 include/linux/fdtable.h:
/*
* Open file table structure
*/
struct files_struct {
/*
* read mostly part
*/
atomic_t count; // <==== Reference counter
bool resize_in_progress;
wait_queue_head_t resize_wait;
struct fdtable __rcu *fdt;
struct fdtable fdtab;
/*
* written part on a separate cache line in SMP
*/
spinlock_t file_lock ____cacheline_aligned_in_smp;
unsigned int next_fd;
unsigned long close_on_exec_init[1];
unsigned long open_fds_init[1];
unsigned long full_fds_bits_init[1];
struct file __rcu * fd_array[NR_OPEN_DEFAULT];
ioctl() 操作
ioctl()系统调用定义为fs/ioctl.c。它首先调用 fdget() 来增加文件条目上的引用计数器,执行请求的操作,然后调用 fdput()
int ksys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
{
int error;
struct fd f = fdget(fd);
if (!f.file)
return -EBADF;
error = security_file_ioctl(f.file, cmd, arg);
if (!error)
error = do_vfs_ioctl(f.file, fd, cmd, arg);
fdput(f);
return error;
}
SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
{
return ksys_ioctl(fd, cmd, arg);
}
文件条目在include/linux/fs.h中定义。它的引用计数器是f_count字段:
struct file {
union {
struct llist_node fu_llist;
struct rcu_head fu_rcuhead;
} f_u;
struct path f_path;
struct inode *f_inode; /* cached value */
const struct file_operations *f_op;
/*
* Protects f_ep_links, f_flags.
* Must not be taken from IRQ context.
*/
spinlock_t f_lock;
enum rw_hint f_write_hint;
atomic_long_t f_count; // <===== Reference counter
unsigned int f_flags;
[...]
} __randomize_layout
__attribute__((aligned(4)));
例子
这是一个简单的设备驱动程序,其中的文件操作仅在触发时显示一条消息。 ioctl() 条目使调用者休眠 5 秒:
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/kdev_t.h>
#include <linux/cdev.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
#include <linux/delay.h>
MODULE_LICENSE("GPL");
#define DEVICE_NAME "device"
static int device_open(struct inode *, struct file *);
static int device_release(struct inode *, struct file *);
static ssize_t device_read(struct file *, char *, size_t, loff_t *);
static ssize_t device_write(struct file *, const char *, size_t, loff_t *);
static long int device_ioctl(struct file *, unsigned int, unsigned long);
static int device_flush(struct file *, fl_owner_t);
static const struct file_operations fops = {
.owner = THIS_MODULE,
.read = device_read,
.write = device_write,
.unlocked_ioctl = device_ioctl,
.open = device_open,
.flush = device_flush,
.release = device_release
};
struct cdev *device_cdev;
dev_t deviceNumbers;
static int __init init(void)
{
// This returns the major number chosen dynamically in deviceNumbers
int ret = alloc_chrdev_region(&deviceNumbers, 0, 1, DEVICE_NAME);
if (ret < 0) {
printk(KERN_ALERT "Error registering: %d\n", ret);
return -1;
}
device_cdev = cdev_alloc();
cdev_init(device_cdev, &fops);
ret = cdev_add(device_cdev, deviceNumbers, 1);
printk(KERN_INFO "Device initialized (major number is %d)\n", MAJOR(deviceNumbers));
return 0;
}
static void __exit cleanup(void)
{
unregister_chrdev_region(deviceNumbers, 1);
cdev_del(device_cdev);
printk(KERN_INFO "Device unloaded\n");
}
static int device_open(struct inode *inode, struct file *file)
{
printk(KERN_INFO "Device open\n");
return 0;
}
static int device_flush(struct file *file, fl_owner_t id)
{
printk(KERN_INFO "Device flush\n");
return 0;
}
static int device_release(struct inode *inode, struct file *file)
{
printk(KERN_INFO "Device released\n");
return 0;
}
static ssize_t device_write(struct file *filp, const char *buff, size_t len, loff_t * off)
{
printk(KERN_INFO "Device write\n");
return len;
}
static ssize_t device_read(struct file *filp, char *buff, size_t len, loff_t * off)
{
printk(KERN_INFO "Device read\n");
return 0;
}
static long int device_ioctl(struct file *file, unsigned int ioctl_num, unsigned long ioctl_param)
{
printk(KERN_INFO "Device ioctl enter\n");
msleep_interruptible(5000);
printk(KERN_INFO "Device ioctl out\n");
return 0;
}
module_init(init);
module_exit(cleanup);
这是一个用户space程序,涉及主线程和次线程。主线程打开上面的设备,等待副线程启动(barrier)1秒后关闭设备。同时,辅助线程在上述设备上调用 ioctl() 使其休眠 5 秒。然后它在退出前第二次调用 ioctl()。
预期的行为是让主线程关闭设备文件,而辅助线程是 运行 ioctl().
#include <stdio.h>
#include <pthread.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <errno.h>
static int dev_fd;
static pthread_barrier_t barrier;
void *entry(void *arg)
{
int rc;
printf("Thread running...\n");
// Rendez-vous with main thread
pthread_barrier_wait(&barrier);
rc = ioctl(dev_fd, 0);
printf("rc = %d, errno = %d\n", rc, errno);
rc = ioctl(dev_fd, 0);
printf("rc = %d, errno = %d\n", rc, errno);
return NULL;
}
int main(void)
{
pthread_t tid;
dev_fd = open("/dev/device", O_RDWR);
pthread_barrier_init(&barrier, NULL, 2);
pthread_create(&tid,NULL, entry, NULL);
pthread_barrier_wait(&barrier);
sleep(1);
close(dev_fd);
pthread_join(tid,NULL);
return 0;
}
安装内核模块:
$ sudo insmod ./device.ko
$ dmesg
[13270.589766] Device initialized (major number is 237)
$ sudo mknod /dev/device c 237 0
$ sudo chmod 666 /dev/device
$ ls -l /dev/device
crw-rw-rw- 1 root root 237, 0 janv. 27 10:55 /dev/device
程序的执行表明,第一个ioctl()让线程等待了5秒。但是第二个 returns 与 EBADF (9) 错误,因为同时设备文件已被主线程关闭:
$ gcc p1.c -lpthread
$ ./a.out
Thread running...
rc = 0, errno = 0
rc = -1, errno = 9
在内核日志中,我们可以看到主线程中的close()只是触发了一个flush()操作当第一个 ioctl() 在辅助线程中运行时设备。然后,一旦第一个 ioctl() 返回,内核内部释放了文件条目(引用计数器降为 0),因此,第二个 ioctl() 没有到达设备,因为文件描述符不再引用打开的文件。因此,第二次调用的 EBADF 错误:
[13270.589766] Device initialized (major number is 237)
[13656.862951] Device open <==== Open() in the main thread
[13656.863315] Device ioctl enter <==== 1st ioctl() in secondary thread
[13657.863523] Device flush <==== 1 s later, flush() = close() in the main thread
[13661.941238] Device ioctl out <==== 5 s later, the 1st ioctl() returns
[13661.941244] Device released <==== The file is released because the reference counter reached 0