内核块设备 - 使用自旋锁陷入死锁

Kernel block device - Falling in deadlock using spinlock

我刚刚实现了一个虚拟块设备,我想用它来探索 linux 内核如何处理块设备。

我的设备只是一个内存区域,分为两个 512 字节的扇区。

我正在使用全局结构来存储设备信息:

typedef struct
{
    uint32_t hard_sector_size;        // Size of a device sector
    uint32_t sector_number;           // Number of sector on device
    uint32_t size;                    // Total size of virtual device in bytes
    uint8_t* data;                    // Device memory buffer
    spinlock_t device_lock;           // Device structure access spinlock
    struct request_queue *queue;      // Device request queue
    struct gendisk *gendisk;          // Device "disk" representation
    int major;                        // Device major number attributed by kernel
    int minor;                        // Device minor number fixed at initialization
    uint32_t r_users;                 // Number of read access
    uint32_t w_users;                 // Number of write access
}blk_mod_t;

blk_mod_t self;

[...]

现在我想保护这个结构免受并发访问。为此,我使用了 device_lock 字段。
如果锁定,结构正在更新,所以我应该等到完成。
如果没有被占用,我可以访问结构字段。

现在我只在以下三个函数中使用这个自旋锁

static int block_mod_open(struct block_device *bdev, fmode_t mode)
{
    access_mode_t access_mode;
    DEBUG("Entering open function\n");

    if((mode & FMODE_READ) && (mode & FMODE_WRITE))
    {
        NOTICE("Oppened in read/write mode\n");
        mode = ACCESS_RW;
    }
    else if(mode & FMODE_READ)
    {
        NOTICE("Oppened in read only mode\n");
        mode = ACCESS_RONLY;
    }
    else if(mode & FMODE_WRITE)
    {
        NOTICE("Oppened in write only mode\n");
        mode = ACCESS_WONLY;
    }

    DEBUG("<--\n");
    spin_lock(&self.device_lock);

    if(ACCESS_RW == access_mode)
    {
        self.r_users++;
        self.w_users++;
    }
    else if(ACCESS_RONLY == access_mode)
    {
        self.r_users++;
    }
    else
    {
        self.w_users++;
    }
    NOTICE("Read access: %d\tWrite access: %d\n", self.r_users, self.w_users);

    DEBUG("-->\n");
    spin_unlock(&self.device_lock);

    DEBUG("Exiting open function\n");
    return 0;
}

static void block_mod_release(struct gendisk *disk, fmode_t mode)
{
    access_mode_t access_mode;
    DEBUG("Entering release function\n");

    if((mode & FMODE_READ) && (mode & FMODE_WRITE))
    {
        NOTICE("Closed read/write mode\n");
        mode = ACCESS_RW;
    }
    else if(mode & FMODE_READ)
    {
        NOTICE("Closed read only mode\n");
        mode = ACCESS_RONLY;
    }
    else if(mode & FMODE_WRITE)
    {
        NOTICE("Closed write only mode\n");
        mode = ACCESS_WONLY;
    }

    DEBUG("<--\n");
    spin_lock(&self.device_lock);

    if(ACCESS_RW == access_mode)
    {
        self.r_users--;
        self.w_users--;
    }
    else if(ACCESS_RONLY == access_mode)
    {
        self.r_users--;
    }
    else
    {
        self.w_users--;
    }
    NOTICE("Read access: %d\tWrite access: %d\n", self.r_users, self.w_users);

    DEBUG("-->\n");
    spin_unlock(&self.device_lock);

    DEBUG("Exiting release function\n");
    return;
}

static void block_mod_transfer(unsigned long sector, unsigned long nsect, char *buffer, int write)
{
    unsigned long offset = sector*KERNEL_SECTOR_SIZE;
    unsigned long nbytes = nsect*KERNEL_SECTOR_SIZE;
    DEBUG("Entering transfer function\n");
    DEBUG("<--\n");
    spin_lock(&self.device_lock);

    if((offset + nbytes) > self.size) {
        WARNING("Beyond-end write (%ld %ld)\n", offset, nbytes);
        spin_unlock(&self.device_lock);
        return;
    }
    if(write)
    {
        NOTICE("Writing to device\n");
        memcpy(self.data + offset, buffer, nbytes);
    }
    else
    {
        NOTICE("Reading from device\n");
        memcpy(buffer, self.data + offset, nbytes);
    }

    DEBUG("-->\n");
    spin_unlock(&self.device_lock);
    DEBUG("Exiting transfer function\n");
}

我正在使用以下函数处理请求

static void block_mod_request(struct request_queue *queue)
{
    DEBUG("Entering request function\n");
    struct request *request;

    while(NULL != (request = blk_fetch_request(queue)))
    {
        blk_mod_t *self = request->rq_disk->private_data;
        // Check if request is a filesystem request (i.e. moves block of data)
        if(REQ_TYPE_FS != request->cmd_type)
        {
            // Close request with unsuccessful status
            WARNING("Skip non-fs request\n");
            __blk_end_request_cur(request, -EIO);
            continue;
        }
        // Treat request
        block_mod_transfer(blk_rq_pos(request), blk_rq_cur_sectors(request), bio_data(request->bio), rq_data_dir(request));
        // Close request with successful status
        __blk_end_request_cur(request, 0);
    }

    DEBUG("Exiting request function\n");
    return;
}

当我加载模块时,没有发生任何特殊情况。但是,如果我尝试读取它,我就会陷入僵局,因为我的系统不再响应,我必须重新启动。

这是输出:

root@PC325:~# echo 8 > /proc/sys/kernel/printk
root@PC325:~# insmod block_mod.ko 
[   64.546791] block_mod: loading out-of-tree module taints kernel.
[   64.548197] block_mod: module license '(c) Test license' taints kernel.
[   64.549951] Disabling lock debugging due to kernel taint
[   64.552816] Inserting module 'blk_mod_test'
[   64.554085] Got major number : '254'
[   64.554940] Data allocated (size = 1024)
[   64.557378] Request queue initialized
[   64.558178] Sent hard sector size to request queue
[   64.559188] Gendisk allocated
[   64.559817] Gendisk filled
[   64.560416] Gendisk capacity set
[   64.563285] Gendisk added
root@PC325:~# [   64.565280] Entering open function
[   64.566035] Oppened in read only mode
[   64.566773] <--
[   64.567138] Read access: 1   Write access: 0
[   64.567977] -->
[   64.568342] Exiting open function
[   64.571080] Entering release function
[   64.571855] Closed read only mode
[   64.572531] <--
[   64.572924] Read access: 0   Write access: 0
[   64.573749] -->
[   64.574116] Exiting release function
root@PC325:~# cat /dev/blkmodtest 
[   78.488228] Entering open function
[   78.488988] Oppened in read only mode
[   78.489733] <--
[   78.490100] Read access: 1   Write access: 0
[   78.490925] -->
[   78.491290] Exiting open function
[   78.492026] Entering request function
[   78.492743] Entering transfer function
[   78.493469] <--
-------------- DEADLOCK HERE --------------

更新: 添加初始化和退出函数

static int __init block_mod_init(void)
{
    char* message = "abcdefghijklmnopqrstuvwxyz";
    int i;

    INFO("Inserting module '%s'\n", MODULE_NAME);

    // Initialize driver data structure
    memset(&self, 0, sizeof(blk_mod_t));
    self.hard_sector_size = DEVICE_HARD_SECTOR_SIZE;
    self.sector_number = DEVICE_SECTOR_NUMBER;
    self.size = self.sector_number*self.hard_sector_size;
    self.minor = 1;

    // Get a major number from kernel
    if(0 > (self.major = register_blkdev(self.major, MODULE_NAME)))
    {
        ERROR("Unable to get major number for '%s'\n", MODULE_NAME);
        unregister_blkdev(self.major, MODULE_NAME);
        return -1;
    }
    DEBUG("Got major number : '%d'\n", self.major);

    // Allocate data space
    if(NULL == (self.data = vmalloc(self.size)))
    {
        ERROR("Unable to allocate memory for '%s'\n", MODULE_NAME);
        unregister_blkdev(self.major, MODULE_NAME);
        return -2;
    }
    for(i=0;i<self.size;i++)
    {
        self.data[i] = message[i%strlen(message)];
    }
    spin_lock_init(&self.device_lock);
    DEBUG("Data allocated (size = %d)\n", self.size);

    // Allocate the request queue
    if(NULL == (self.queue = blk_init_queue(block_mod_request, &self.device_lock)))
    {
        ERROR("Unable to initialize request queue for '%s'\n", MODULE_NAME);
        vfree(self.data);
        unregister_blkdev(self.major, MODULE_NAME);
        return -3;
    }
    DEBUG("Request queue initialized\n");

    // Send device hard sector size to request queue
    blk_queue_logical_block_size(self.queue, self.hard_sector_size);
    self.queue->queuedata = &self;
    DEBUG("Sent hard sector size to request queue\n");

    // Allocate the gendisk structure
    if(NULL == (self.gendisk = alloc_disk(self.minor)))
    {
        ERROR("Unable to initialize gendisk for '%s'\n", MODULE_NAME);
        blk_cleanup_queue(self.queue);
        vfree(self.data);
        unregister_blkdev(self.major, MODULE_NAME);
        return -4;
    }
    DEBUG("Gendisk allocated\n");

    // Fill gendisk structure
    self.gendisk->major = self.major;
    self.gendisk->first_minor = self.minor;
    self.gendisk->fops = &self_ops;
    self.gendisk->queue = self.queue;
    self.gendisk->private_data = &self;
    snprintf(self.gendisk->disk_name, 32, "blkmodtest");
    DEBUG("Gendisk filled\n");
    set_capacity(self.gendisk, self.sector_number*(self.hard_sector_size/KERNEL_SECTOR_SIZE));
    DEBUG("Gendisk capacity set\n");
    add_disk(self.gendisk);
    DEBUG("Gendisk added\n");

    return 0;
}

static void __exit block_mod_cleanup(void)
{
    del_gendisk(self.gendisk);
    put_disk(self.gendisk);
    blk_cleanup_queue(self.queue);
    vfree(self.data);
    unregister_blkdev(self.major, MODULE_NAME);

    INFO("Removing module '%s'\n", MODULE_NAME);
    return;
}

更新: 添加宏和枚举定义

#define MODULE_NAME                    "blk_mod_test"
#define KERNEL_SECTOR_SIZE             512
#define DEVICE_HARD_SECTOR_SIZE        512
#define DEVICE_SECTOR_NUMBER           2

typedef enum
{
    ACCESS_RONLY = 0,
    ACCESS_WONLY = 1,
    ACCESS_RW    = 2,
}access_mode_t;

我不明白的是,在我尝试将其锁定到 block_mod_transfer.

之前,自旋锁已被释放(在 block_mod_open 末尾)

所以我不明白为什么内核在似乎可以使用自旋锁时陷入死锁。

为什么我会在这种情况下陷入僵局?我做错了什么?

感谢@CraigEstey 的评论,我终于发现问题出在请求队列使用与我的设备结构相同的自旋锁初始化。

// Allocate the request queue
if(NULL == (self.queue = blk_init_queue(block_mod_request, &self.device_lock)))
{
    ERROR("Unable to initialize request queue for '%s'\n", MODULE_NAME);
    vfree(self.data);
    unregister_blkdev(self.major, MODULE_NAME);
    return -3;
}

所以当调用请求队列的回调函数(即block_mod_request)时,自旋锁已经被持有,我陷入了死锁。