内核模块重新加载后中断处理程序停止工作 (Xilinx FPGA / PCIe)
Interrupt Handler Stops Working After Kernel Module Reload (Xilinx FPGA / PCIe)
我目前正在使用 Xilinx PCI IP 内核(AXI 内存映射到 PCIe)为 Xilinx Kintex 7 板开发 PCI 驱动程序。
一个问题是,当我重新加载内核模块时,中断处理程序停止工作。更详细:
- 我的机器全新启动
- 加载内核模块并使用
dmesg
监视内核消息
/proc/interrupts
显示预期的中断 ID
- 我触发了硬件中断,一切正常;我可以看到中断处理程序正在工作。
rmmod my_module
/proc/interrupts
按预期删除了中断 ID
insmod my_module
并触发中断
- 现在中断处理程序是静默的,
/proc/interrupts
不会增加计数器
我重新启动了我的机器,一切都恢复正常了。我不必重新启动 FPGA 这一事实让我假设我在内核模块中做错了什么,它可能不是硬件问题。
我已经使用 /sys/pci/devices/.../reset
、/sys/bus/pci/devices/.../remove
和 /sys/bus/pci/rescan
来尝试达到相当于新启动机器的状态。但是没有任何效果。
相关模块代码:
#define VENDOR_ID 0x10EE
#define DEVICE_ID 0x7024
static dev_t pci_dev_number;
static struct cdev * driver_object;
static struct class * pci_class;
static struct device * pci_prc;
static struct device * pci_irq_0;
static struct device * pci_irq_1;
static int msi_vec_num = 2; // Number of requested MSI interrupts
static int msi_0 = -1;
static int msi_1 = -1;
// Used for poll and select
static DECLARE_WAIT_QUEUE_HEAD(queue_vs0);
static DECLARE_WAIT_QUEUE_HEAD(queue_vs1);
static irqreturn_t pci_isr_0(int irq, void * dev_id) {
printk(KERN_NOTICE "codec IRQ: interrupt handler 0. IRQ: %d\n", irq);
wake_up_interruptible(&queue_vs0);
return IRQ_HANDLED;
}
static irqreturn_t pci_isr_1(int irq, void * dev_id) {
printk(KERN_NOTICE "codec IRQ: interrupt handler 1. IRQ: %d\n", irq);
wake_up_interruptible(&queue_vs1);
return IRQ_HANDLED;
}
static void* bars[PCIE_BARS] = {0};
static int device_init(struct pci_dev * pdev, const struct pci_device_id * id) {
int i = 0; // loop var
if (pci_enable_device(pdev))
return -EIO;
// Request memory regions for bar 0 to 2
for (i = 0; i < PCIE_BARS; i++) {
if (pci_request_region(pdev, i, "codec_pci") != 0) {
dev_err( & pdev - > dev, "Bar %d - I/O address conflict for device \"%s\"\n", i, pdev - > dev.kobj.name);
return -EIO;
}
}
// DEBUG: Check if we are in memory space (which we should) or io space
if ((pci_resource_flags(pdev, 0) & IORESOURCE_IO)) {
printk(KERN_NOTICE "codec INIT: in io space\n");
} else if ((pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
printk(KERN_NOTICE "codec INIT: in mem_space\n");
}
// This request enables MSI_enable in the hardware
msi_vec_num = pci_alloc_irq_vectors(pdev, 1, msi_vec_num, PCI_IRQ_MSI);
// msi_N will contain the IRQ number - see /proc/interrupts
msi_0 = pci_irq_vector(pdev, 0);
msi_1 = pci_irq_vector(pdev, 1);
printk(KERN_NOTICE "codec INIT: nvec: %d\n", msi_vec_num);
printk(KERN_NOTICE "codec INIT: msi_0: %d\n", msi_0);
printk(KERN_NOTICE "codec INIT: msi_1: %d\n", msi_1);
if (request_irq(msi_0, pci_isr_0, IRQF_SHARED, "codec_pci", pdev)) {
dev_err( & pdev - > dev, "codec INIT: IRQ MSI %d not free.\n", msi_0);
goto cleanup;
};
if (request_irq(msi_1, pci_isr_1, IRQF_SHARED, "codec_pci", pdev)) {
dev_err( & pdev - > dev, "codec INIT: IRQ MSI %d not free.\n", msi_1);
goto cleanup;
};
for (i = 0; i < PCIE_BARS; i++) {
// Last parameter is the address space/length of each bar. Defined in the PCIe core.
bars[i] = pci_iomap(pdev, i, pci_resource_len(pdev, i));
if (bars[i] == NULL) {
printk(KERN_ERR "codec INIT: bar %d allocation failed\n", i);
goto cleanup;
}
printk(KERN_NOTICE "codec INIT: bar %d pointer: %p\n", i, bars[i]);
}
printk(KERN_NOTICE "codec INIT: loaded\n");
return 0;
cleanup:
for (i = 0; i < PCIE_BARS; i++) {
if (bars[i] != NULL)
pci_iounmap(pdev, bars[i]);
pci_release_region(pdev, i);
}
return -EIO;
}
static void device_deinit(struct pci_dev * pdev) {
int i = 0; // loop var
if (msi_0 >= 0)
free_irq(msi_0, pdev);
if (msi_1 >= 0)
free_irq(msi_1, pdev);
pci_free_irq_vectors(pdev);
// release bar regions
for (i = 0; i < PCIE_BARS; i++)
pci_release_region(pdev, i);
for (i = 0; i < PCIE_BARS; i++) {
if (bars[i] != NULL)
pci_iounmap(pdev, bars[i]);
}
pci_disable_device(pdev);
}
// File operations not in this snipped
static struct file_operations fops = {
.owner = THIS_MODULE,
.open = device_open,
.read = device_read,
.write = device_write,
.poll = device_poll
};
static struct pci_device_id pci_drv_tbl[] = {
{
VENDOR_ID,
DEVICE_ID,
PCI_ANY_ID,
PCI_ANY_ID,
0,
0,
0
},
{
0,
}
};
static struct pci_driver pci_drv = {
.name = "codec_pci",
.id_table = pci_drv_tbl,
.probe = device_init,
.remove = device_deinit
};
static int __init mod_init(void) {
int i = 0;
if (alloc_chrdev_region( & pci_dev_number, 0, MAX_DEVICES, "codec_pci") < 0)
return -EIO;
driver_object = cdev_alloc();
if (driver_object == NULL)
goto free_dev_number;
driver_object - > owner = THIS_MODULE;
driver_object - > ops = & fops;
if (cdev_add(driver_object, pci_dev_number, MAX_DEVICES))
goto free_cdev;
pci_class = class_create(THIS_MODULE, "codec_pci");
if (IS_ERR(pci_class)) {
pr_err("codec MOD_INIT: no udev support available\n");
goto free_cdev;
}
pci_prc = device_create(pci_class, NULL, MKDEV(MAJOR(pci_dev_number), MINOR(pci_dev_number) + 0), NULL, "%s", "codec_prc");
pci_irq_0 = device_create(pci_class, NULL, MKDEV(MAJOR(pci_dev_number), MINOR(pci_dev_number) + 1), NULL, "codec_irq_%d", 0);
pci_irq_1 = device_create(pci_class, NULL, MKDEV(MAJOR(pci_dev_number), MINOR(pci_dev_number) + 2), NULL, "codec_irq_%d", 1);
if (pci_register_driver( & pci_drv) < 0) {
for (i = 0; i < MAX_DEVICES; i++)
device_destroy(pci_class, MKDEV(pci_dev_number, i));
goto free_dev_number;
}
return 0;
free_cdev:
kobject_put( & driver_object - > kobj);
free_dev_number:
unregister_chrdev_region(pci_dev_number, MAX_DEVICES);
return -EIO;
}
static void __exit mod_exit(void) {
int i = 0;
pci_unregister_driver( & pci_drv);
device_unregister(pci_prc);
device_unregister(pci_irq_0);
device_unregister(pci_irq_1);
for (i = 0; i < MAX_DEVICES; i++) {
device_destroy(pci_class, MKDEV(pci_dev_number, i));
}
class_destroy(pci_class);
cdev_del(driver_object);
unregister_chrdev_region(pci_dev_number, MAX_DEVICES);
}
module_init(mod_init);
module_exit(mod_exit);
错误处理例程可能会更好,但无论如何它们都不会被触发。
我想我找到了问题的原因。我在执行原始 post 的每个步骤时查看了 PCI configuration space。中断工作时的配置space:
# lspci -xxx | grep Xilinx
23:00.0 Memory controller: Xilinx Corporation Device 7024
00: ee 10 24 70 *07* 04 10 00 00 00 80 05 10 00 00 00
. . .
当它损坏时:
# lspci -xxx | grep Xilinx
23:00.0 Memory controller: Xilinx Corporation Device 7024
00: ee 10 24 70 *03* 04 10 00 00 00 80 05 10 00 00 00
. . .
我发现command register value changes after the kernel module reload (marked with *). When the interrupt works the command register value is 0x0407
, after the module reload it is 0x0403
. Why? I don't know. It is probably just the way the Xilinx AXI Memory Mapped to PCIe核心实现了。
无论如何,您可以使用 setpci(8)
.
设置 PCI 配置 space 的值
命令寄存器的想要的值是0407
所以你执行:
# setpci -d <vendor_id>:<device_id> command=0407
#read back to check if it worked
# sudo setpci -d <vendor_id>:<device_id> command
0407
之后中断又正常了,我不需要重启。
在内核模块中你可以,例如使用 pci_write_config_byte(...)
将命令寄存器(或任何其他寄存器)设置为所需的值。可以在此处找到访问配置 space 的相应函数:Linux Device Drivers - Accessing the Configuration Space
我目前正在使用 Xilinx PCI IP 内核(AXI 内存映射到 PCIe)为 Xilinx Kintex 7 板开发 PCI 驱动程序。 一个问题是,当我重新加载内核模块时,中断处理程序停止工作。更详细:
- 我的机器全新启动
- 加载内核模块并使用
dmesg
监视内核消息
/proc/interrupts
显示预期的中断 ID- 我触发了硬件中断,一切正常;我可以看到中断处理程序正在工作。
rmmod my_module
/proc/interrupts
按预期删除了中断 IDinsmod my_module
并触发中断- 现在中断处理程序是静默的,
/proc/interrupts
不会增加计数器
我重新启动了我的机器,一切都恢复正常了。我不必重新启动 FPGA 这一事实让我假设我在内核模块中做错了什么,它可能不是硬件问题。
我已经使用 /sys/pci/devices/.../reset
、/sys/bus/pci/devices/.../remove
和 /sys/bus/pci/rescan
来尝试达到相当于新启动机器的状态。但是没有任何效果。
相关模块代码:
#define VENDOR_ID 0x10EE
#define DEVICE_ID 0x7024
static dev_t pci_dev_number;
static struct cdev * driver_object;
static struct class * pci_class;
static struct device * pci_prc;
static struct device * pci_irq_0;
static struct device * pci_irq_1;
static int msi_vec_num = 2; // Number of requested MSI interrupts
static int msi_0 = -1;
static int msi_1 = -1;
// Used for poll and select
static DECLARE_WAIT_QUEUE_HEAD(queue_vs0);
static DECLARE_WAIT_QUEUE_HEAD(queue_vs1);
static irqreturn_t pci_isr_0(int irq, void * dev_id) {
printk(KERN_NOTICE "codec IRQ: interrupt handler 0. IRQ: %d\n", irq);
wake_up_interruptible(&queue_vs0);
return IRQ_HANDLED;
}
static irqreturn_t pci_isr_1(int irq, void * dev_id) {
printk(KERN_NOTICE "codec IRQ: interrupt handler 1. IRQ: %d\n", irq);
wake_up_interruptible(&queue_vs1);
return IRQ_HANDLED;
}
static void* bars[PCIE_BARS] = {0};
static int device_init(struct pci_dev * pdev, const struct pci_device_id * id) {
int i = 0; // loop var
if (pci_enable_device(pdev))
return -EIO;
// Request memory regions for bar 0 to 2
for (i = 0; i < PCIE_BARS; i++) {
if (pci_request_region(pdev, i, "codec_pci") != 0) {
dev_err( & pdev - > dev, "Bar %d - I/O address conflict for device \"%s\"\n", i, pdev - > dev.kobj.name);
return -EIO;
}
}
// DEBUG: Check if we are in memory space (which we should) or io space
if ((pci_resource_flags(pdev, 0) & IORESOURCE_IO)) {
printk(KERN_NOTICE "codec INIT: in io space\n");
} else if ((pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
printk(KERN_NOTICE "codec INIT: in mem_space\n");
}
// This request enables MSI_enable in the hardware
msi_vec_num = pci_alloc_irq_vectors(pdev, 1, msi_vec_num, PCI_IRQ_MSI);
// msi_N will contain the IRQ number - see /proc/interrupts
msi_0 = pci_irq_vector(pdev, 0);
msi_1 = pci_irq_vector(pdev, 1);
printk(KERN_NOTICE "codec INIT: nvec: %d\n", msi_vec_num);
printk(KERN_NOTICE "codec INIT: msi_0: %d\n", msi_0);
printk(KERN_NOTICE "codec INIT: msi_1: %d\n", msi_1);
if (request_irq(msi_0, pci_isr_0, IRQF_SHARED, "codec_pci", pdev)) {
dev_err( & pdev - > dev, "codec INIT: IRQ MSI %d not free.\n", msi_0);
goto cleanup;
};
if (request_irq(msi_1, pci_isr_1, IRQF_SHARED, "codec_pci", pdev)) {
dev_err( & pdev - > dev, "codec INIT: IRQ MSI %d not free.\n", msi_1);
goto cleanup;
};
for (i = 0; i < PCIE_BARS; i++) {
// Last parameter is the address space/length of each bar. Defined in the PCIe core.
bars[i] = pci_iomap(pdev, i, pci_resource_len(pdev, i));
if (bars[i] == NULL) {
printk(KERN_ERR "codec INIT: bar %d allocation failed\n", i);
goto cleanup;
}
printk(KERN_NOTICE "codec INIT: bar %d pointer: %p\n", i, bars[i]);
}
printk(KERN_NOTICE "codec INIT: loaded\n");
return 0;
cleanup:
for (i = 0; i < PCIE_BARS; i++) {
if (bars[i] != NULL)
pci_iounmap(pdev, bars[i]);
pci_release_region(pdev, i);
}
return -EIO;
}
static void device_deinit(struct pci_dev * pdev) {
int i = 0; // loop var
if (msi_0 >= 0)
free_irq(msi_0, pdev);
if (msi_1 >= 0)
free_irq(msi_1, pdev);
pci_free_irq_vectors(pdev);
// release bar regions
for (i = 0; i < PCIE_BARS; i++)
pci_release_region(pdev, i);
for (i = 0; i < PCIE_BARS; i++) {
if (bars[i] != NULL)
pci_iounmap(pdev, bars[i]);
}
pci_disable_device(pdev);
}
// File operations not in this snipped
static struct file_operations fops = {
.owner = THIS_MODULE,
.open = device_open,
.read = device_read,
.write = device_write,
.poll = device_poll
};
static struct pci_device_id pci_drv_tbl[] = {
{
VENDOR_ID,
DEVICE_ID,
PCI_ANY_ID,
PCI_ANY_ID,
0,
0,
0
},
{
0,
}
};
static struct pci_driver pci_drv = {
.name = "codec_pci",
.id_table = pci_drv_tbl,
.probe = device_init,
.remove = device_deinit
};
static int __init mod_init(void) {
int i = 0;
if (alloc_chrdev_region( & pci_dev_number, 0, MAX_DEVICES, "codec_pci") < 0)
return -EIO;
driver_object = cdev_alloc();
if (driver_object == NULL)
goto free_dev_number;
driver_object - > owner = THIS_MODULE;
driver_object - > ops = & fops;
if (cdev_add(driver_object, pci_dev_number, MAX_DEVICES))
goto free_cdev;
pci_class = class_create(THIS_MODULE, "codec_pci");
if (IS_ERR(pci_class)) {
pr_err("codec MOD_INIT: no udev support available\n");
goto free_cdev;
}
pci_prc = device_create(pci_class, NULL, MKDEV(MAJOR(pci_dev_number), MINOR(pci_dev_number) + 0), NULL, "%s", "codec_prc");
pci_irq_0 = device_create(pci_class, NULL, MKDEV(MAJOR(pci_dev_number), MINOR(pci_dev_number) + 1), NULL, "codec_irq_%d", 0);
pci_irq_1 = device_create(pci_class, NULL, MKDEV(MAJOR(pci_dev_number), MINOR(pci_dev_number) + 2), NULL, "codec_irq_%d", 1);
if (pci_register_driver( & pci_drv) < 0) {
for (i = 0; i < MAX_DEVICES; i++)
device_destroy(pci_class, MKDEV(pci_dev_number, i));
goto free_dev_number;
}
return 0;
free_cdev:
kobject_put( & driver_object - > kobj);
free_dev_number:
unregister_chrdev_region(pci_dev_number, MAX_DEVICES);
return -EIO;
}
static void __exit mod_exit(void) {
int i = 0;
pci_unregister_driver( & pci_drv);
device_unregister(pci_prc);
device_unregister(pci_irq_0);
device_unregister(pci_irq_1);
for (i = 0; i < MAX_DEVICES; i++) {
device_destroy(pci_class, MKDEV(pci_dev_number, i));
}
class_destroy(pci_class);
cdev_del(driver_object);
unregister_chrdev_region(pci_dev_number, MAX_DEVICES);
}
module_init(mod_init);
module_exit(mod_exit);
错误处理例程可能会更好,但无论如何它们都不会被触发。
我想我找到了问题的原因。我在执行原始 post 的每个步骤时查看了 PCI configuration space。中断工作时的配置space:
# lspci -xxx | grep Xilinx
23:00.0 Memory controller: Xilinx Corporation Device 7024
00: ee 10 24 70 *07* 04 10 00 00 00 80 05 10 00 00 00
. . .
当它损坏时:
# lspci -xxx | grep Xilinx
23:00.0 Memory controller: Xilinx Corporation Device 7024
00: ee 10 24 70 *03* 04 10 00 00 00 80 05 10 00 00 00
. . .
我发现command register value changes after the kernel module reload (marked with *). When the interrupt works the command register value is 0x0407
, after the module reload it is 0x0403
. Why? I don't know. It is probably just the way the Xilinx AXI Memory Mapped to PCIe核心实现了。
无论如何,您可以使用 setpci(8)
.
命令寄存器的想要的值是0407
所以你执行:
# setpci -d <vendor_id>:<device_id> command=0407
#read back to check if it worked
# sudo setpci -d <vendor_id>:<device_id> command
0407
之后中断又正常了,我不需要重启。
在内核模块中你可以,例如使用 pci_write_config_byte(...)
将命令寄存器(或任何其他寄存器)设置为所需的值。可以在此处找到访问配置 space 的相应函数:Linux Device Drivers - Accessing the Configuration Space