内核模块重新加载后中断处理程序停止工作 (Xilinx FPGA / PCIe)

Interrupt Handler Stops Working After Kernel Module Reload (Xilinx FPGA / PCIe)

我目前正在使用 Xilinx PCI IP 内核(AXI 内存映射到 PCIe)为 Xilinx Kintex 7 板开发 PCI 驱动程序。 一个问题是,当我重新加载内核模块时,中断处理程序停止工作。更详细:

  1. 我的机器全新启动
  2. 加载内核模块并使用 dmesg
  3. 监视内核消息
  4. /proc/interrupts 显示预期的中断 ID
  5. 我触发了硬件中断,一切正常;我可以看到中断处理程序正在工作。
  6. rmmod my_module
  7. /proc/interrupts 按预期删除了中断 ID
  8. insmod my_module并触发中断
  9. 现在中断处理程序是静默的,/proc/interrupts 不会增加计数器

我重新启动了我的机器,一切都恢复正常了。我不必重新启动 FPGA 这一事实让我假设我在内核模块中做错了什么,它可能不是硬件问题。

我已经使用 /sys/pci/devices/.../reset/sys/bus/pci/devices/.../remove/sys/bus/pci/rescan 来尝试达到相当于新启动机器的状态。但是没有任何效果。

相关模块代码:

#define VENDOR_ID 0x10EE
#define DEVICE_ID 0x7024

static dev_t pci_dev_number;
static struct cdev * driver_object;
static struct class * pci_class;
static struct device * pci_prc;
static struct device * pci_irq_0;
static struct device * pci_irq_1;

static int msi_vec_num = 2; // Number of requested MSI interrupts
static int msi_0 = -1;
static int msi_1 = -1;

// Used for poll and select
static DECLARE_WAIT_QUEUE_HEAD(queue_vs0);
static DECLARE_WAIT_QUEUE_HEAD(queue_vs1);

static irqreturn_t pci_isr_0(int irq, void * dev_id) {
  printk(KERN_NOTICE "codec IRQ: interrupt handler 0. IRQ: %d\n", irq);
  wake_up_interruptible(&queue_vs0);
  return IRQ_HANDLED;
}

static irqreturn_t pci_isr_1(int irq, void * dev_id) {
  printk(KERN_NOTICE "codec IRQ: interrupt handler 1. IRQ: %d\n", irq);
  wake_up_interruptible(&queue_vs1);
  return IRQ_HANDLED;
}

static void* bars[PCIE_BARS] = {0};

static int device_init(struct pci_dev * pdev, const struct pci_device_id * id) {
  int i = 0; // loop var
  if (pci_enable_device(pdev))
    return -EIO;

  // Request memory regions for bar 0 to 2
  for (i = 0; i < PCIE_BARS; i++) {
    if (pci_request_region(pdev, i, "codec_pci") != 0) {
      dev_err( & pdev - > dev, "Bar %d - I/O address conflict for device \"%s\"\n", i, pdev - > dev.kobj.name);
      return -EIO;
    }
  }

  // DEBUG: Check if we are in memory space (which we should) or io space
  if ((pci_resource_flags(pdev, 0) & IORESOURCE_IO)) {
    printk(KERN_NOTICE "codec INIT: in io space\n");
  } else if ((pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
    printk(KERN_NOTICE "codec INIT: in mem_space\n");
  }

  // This request enables MSI_enable in the hardware
  msi_vec_num = pci_alloc_irq_vectors(pdev, 1, msi_vec_num, PCI_IRQ_MSI);

  // msi_N will contain the IRQ number - see /proc/interrupts
  msi_0 = pci_irq_vector(pdev, 0);
  msi_1 = pci_irq_vector(pdev, 1);
  printk(KERN_NOTICE "codec INIT: nvec: %d\n", msi_vec_num);
  printk(KERN_NOTICE "codec INIT: msi_0: %d\n", msi_0);
  printk(KERN_NOTICE "codec INIT: msi_1: %d\n", msi_1);

  if (request_irq(msi_0, pci_isr_0, IRQF_SHARED, "codec_pci", pdev)) {
    dev_err( & pdev - > dev, "codec INIT: IRQ MSI %d not free.\n", msi_0);
    goto cleanup;
  };
  if (request_irq(msi_1, pci_isr_1, IRQF_SHARED, "codec_pci", pdev)) {
    dev_err( & pdev - > dev, "codec INIT: IRQ MSI %d not free.\n", msi_1);
    goto cleanup;
  };

  for (i = 0; i < PCIE_BARS; i++) {
    // Last parameter is the address space/length of each bar. Defined in the PCIe core.
    bars[i] = pci_iomap(pdev, i, pci_resource_len(pdev, i));
    if (bars[i] == NULL) {
      printk(KERN_ERR "codec INIT: bar %d allocation failed\n", i);
      goto cleanup;
    }
    printk(KERN_NOTICE "codec INIT: bar %d pointer: %p\n", i, bars[i]);
  }

  printk(KERN_NOTICE "codec INIT: loaded\n");

  return 0;
cleanup:
    for (i = 0; i < PCIE_BARS; i++) {
      if (bars[i] != NULL)
        pci_iounmap(pdev, bars[i]);
      pci_release_region(pdev, i);
    }
    
  return -EIO;
}

static void device_deinit(struct pci_dev * pdev) {
  int i = 0; // loop var

  if (msi_0 >= 0)
    free_irq(msi_0, pdev);

  if (msi_1 >= 0)
    free_irq(msi_1, pdev);

  pci_free_irq_vectors(pdev);

  // release bar regions
  for (i = 0; i < PCIE_BARS; i++)
    pci_release_region(pdev, i);

  for (i = 0; i < PCIE_BARS; i++) {
    if (bars[i] != NULL)
      pci_iounmap(pdev, bars[i]);
  }
  pci_disable_device(pdev);
}

// File operations not in this snipped
static struct file_operations fops = {
  .owner = THIS_MODULE,
  .open = device_open,
  .read = device_read,
  .write = device_write,
  .poll = device_poll
};

static struct pci_device_id pci_drv_tbl[] = {
  {
    VENDOR_ID,
    DEVICE_ID,
    PCI_ANY_ID,
    PCI_ANY_ID,
    0,
    0,
    0
  },
  {
    0,
  }
};

static struct pci_driver pci_drv = {
  .name = "codec_pci",
  .id_table = pci_drv_tbl,
  .probe = device_init,
  .remove = device_deinit
};

static int __init mod_init(void) {
  int i = 0;
  if (alloc_chrdev_region( & pci_dev_number, 0, MAX_DEVICES, "codec_pci") < 0)
    return -EIO;
  driver_object = cdev_alloc();
  if (driver_object == NULL)
    goto free_dev_number;
  driver_object - > owner = THIS_MODULE;
  driver_object - > ops = & fops;
  if (cdev_add(driver_object, pci_dev_number, MAX_DEVICES))
    goto free_cdev;
  pci_class = class_create(THIS_MODULE, "codec_pci");
  if (IS_ERR(pci_class)) {
    pr_err("codec MOD_INIT: no udev support available\n");
    goto free_cdev;
  }

  pci_prc = device_create(pci_class, NULL, MKDEV(MAJOR(pci_dev_number), MINOR(pci_dev_number) + 0), NULL, "%s", "codec_prc");
  pci_irq_0 = device_create(pci_class, NULL, MKDEV(MAJOR(pci_dev_number), MINOR(pci_dev_number) + 1), NULL, "codec_irq_%d", 0);
  pci_irq_1 = device_create(pci_class, NULL, MKDEV(MAJOR(pci_dev_number), MINOR(pci_dev_number) + 2), NULL, "codec_irq_%d", 1);

  if (pci_register_driver( & pci_drv) < 0) {
    for (i = 0; i < MAX_DEVICES; i++)
      device_destroy(pci_class, MKDEV(pci_dev_number, i));
    goto free_dev_number;
  }

  return 0;
free_cdev:
    kobject_put( & driver_object - > kobj);
free_dev_number:
    unregister_chrdev_region(pci_dev_number, MAX_DEVICES);
  return -EIO;
}

static void __exit mod_exit(void) {
  int i = 0;
  pci_unregister_driver( & pci_drv);
  device_unregister(pci_prc);
  device_unregister(pci_irq_0);
  device_unregister(pci_irq_1);
  for (i = 0; i < MAX_DEVICES; i++) {
    device_destroy(pci_class, MKDEV(pci_dev_number, i));
  }
  class_destroy(pci_class);
  cdev_del(driver_object);
  unregister_chrdev_region(pci_dev_number, MAX_DEVICES);
}

module_init(mod_init);
module_exit(mod_exit);

错误处理例程可能会更好,但无论如何它们都不会被触发。

我想我找到了问题的原因。我在执行原始 post 的每个步骤时查看了 PCI configuration space。中断工作时的配置space:

# lspci -xxx | grep Xilinx
23:00.0 Memory controller: Xilinx Corporation Device 7024
00: ee 10 24 70 *07* 04 10 00 00 00 80 05 10 00 00 00
. . .

当它损坏时:

# lspci -xxx | grep Xilinx
23:00.0 Memory controller: Xilinx Corporation Device 7024
00: ee 10 24 70 *03* 04 10 00 00 00 80 05 10 00 00 00
. . .

我发现command register value changes after the kernel module reload (marked with *). When the interrupt works the command register value is 0x0407, after the module reload it is 0x0403. Why? I don't know. It is probably just the way the Xilinx AXI Memory Mapped to PCIe核心实现了。

无论如何,您可以使用 setpci(8).

设置 PCI 配置 space 的值

命令寄存器的想要的值是0407所以你执行:

# setpci -d <vendor_id>:<device_id> command=0407

#read back to check if it worked
# sudo setpci -d <vendor_id>:<device_id> command
0407

之后中断又正常了,我不需要重启。

在内核模块中你可以,例如使用 pci_write_config_byte(...) 将命令寄存器(或任何其他寄存器)设置为所需的值。可以在此处找到访问配置 space 的相应函数:Linux Device Drivers - Accessing the Configuration Space