Vulkan API 和 C - 查询物理设备属性导致段错误

Vulkan API and C - Query for physical device properties causes segfault

这是主调用函数

void createVulkanContext()
{
  queueFamilyCount = 0;

  populatePhysicalDevice(&instance, &physicalDevice);
  physicalDeviceTest(&physicalDevice); // This one works fine

  populateQueueFamilies(physicalDevice, &queueFamilyIndicesList, &queueFamilyCount, surface);
  physicalDeviceTest(&physicalDevice); // This one causes segfault
}

这是查询函数

void physicalDeviceTest(VkPhysicalDevice* gPhysicalDevice)
{

  printf("%p\n", gPhysicalDevice);

  VkPhysicalDeviceProperties pdProp;
  vkGetPhysicalDeviceProperties(*gPhysicalDevice, &pdProp);

  printf("%u\n", pdProp.deviceID);
  printf("%s\n", pdProp.deviceName);
  printf("%u\n", pdProp.apiVersion);
  printf("%u\n", pdProp.driverVersion);
  printf("%u\n", pdProp.vendorID);
}

打印这个

0x55555555a1e8
26958
Unknown AMD GPU
4198513
8388708
4098
0x55555555a1e8

这是gdb backtrace结果

Thread 1 "VulkanApp1090" received signal SIGSEGV, Segmentation fault.
0x00007ffff7f63467 in vkGetPhysicalDeviceProperties () from /usr/lib/libvulkan.so.1
(gdb) backtrace
#0  0x00007ffff7f63467 in vkGetPhysicalDeviceProperties () from /usr/lib/libvulkan.so.1
#1  0x0000555555555602 in physicalDeviceTest (gPhysicalDevice=0x55555555a1e8 <physicalDevice>)

据我所知,这不应该发生,因为 populateQueueFamilies(physicalDevice, &queueFamilyIndicesList ...) 函数并没有真正改变按值传递的 physicalDevice 变量。


完整代码

typedef struct QueueFamilyIndices
{
  int graphicsFamilySupportQueueIndex;
  int computeFamilySupportQueueIndex;
  int transferFamilySupportQueueIndex;
  int sparsebindingFamilySupportQueueIndex;
  int protectedFamilySupportQueueIndex;
  int presentFamilySupportQueueIndex;

} QueueFamilyIndices;


VkInstance instance;

VkPhysicalDevice physicalDevice;
VkDevice         logicalDevice;

QueueFamilyIndices* queueFamilyIndicesList;
QueueFamilyIndices  selectedQueueFamilyIndex;
uint32_t            queueFamilyCount;

VkQueue      graphicsQueue;
VkSurfaceKHR surface;


void populateQueueFamilyQueueIndices(VkQueueFamilyProperties gQueueFamilyProperties,
                                     uint32_t                gQueueFamilyIndex,
                                     QueueFamilyIndices*     gQueueFamilyIndices)
{
  gQueueFamilyIndices->graphicsFamilySupportQueueIndex      = -1;
  gQueueFamilyIndices->computeFamilySupportQueueIndex       = -1;
  gQueueFamilyIndices->transferFamilySupportQueueIndex      = -1;
  gQueueFamilyIndices->sparsebindingFamilySupportQueueIndex = -1;
  gQueueFamilyIndices->protectedFamilySupportQueueIndex     = -1;

  if (gQueueFamilyProperties.queueFlags & VK_QUEUE_GRAPHICS_BIT)
    {
      gQueueFamilyIndices->graphicsFamilySupportQueueIndex = gQueueFamilyIndex;
    }
  if (gQueueFamilyProperties.queueFlags & VK_QUEUE_COMPUTE_BIT)
    {
      gQueueFamilyIndices->computeFamilySupportQueueIndex = gQueueFamilyIndex;
    }
  if (gQueueFamilyProperties.queueFlags & VK_QUEUE_TRANSFER_BIT)
    {
      gQueueFamilyIndices->transferFamilySupportQueueIndex = gQueueFamilyIndex;
    }
  if (gQueueFamilyProperties.queueFlags & VK_QUEUE_SPARSE_BINDING_BIT)
    {
      gQueueFamilyIndices->sparsebindingFamilySupportQueueIndex = gQueueFamilyIndex;
    }
  if (gQueueFamilyProperties.queueFlags & VK_QUEUE_PROTECTED_BIT)
    {
      gQueueFamilyIndices->protectedFamilySupportQueueIndex = gQueueFamilyIndex;
    }
}


void populateQueueFamilies(VkPhysicalDevice     gPhysicalDevice,
                           QueueFamilyIndices** gQueueFamilyIndicesList,
                           uint32_t*            gQueueFamilyCount,
                           VkSurfaceKHR         surface)
{

  uint32_t queueFamilyCount;
  vkGetPhysicalDeviceQueueFamilyProperties(gPhysicalDevice, &queueFamilyCount, VK_NULL_HANDLE);

  VkQueueFamilyProperties queueFamilies[queueFamilyCount];
  vkGetPhysicalDeviceQueueFamilyProperties(gPhysicalDevice, &queueFamilyCount, queueFamilies);

  VkBool32 presentFamilySupported;

  *gQueueFamilyIndicesList = malloc(sizeof(QueueFamilyIndices*) * queueFamilyCount);

  for (uint32_t i = 0; i < queueFamilyCount; ++i)
    {
      QueueFamilyIndices gQueueFamilyIndices;

      populateQueueFamilyQueueIndices(queueFamilies[i], i, &gQueueFamilyIndices);

      presentFamilySupported = false;

      vkGetPhysicalDeviceSurfaceSupportKHR(gPhysicalDevice, i, surface, &presentFamilySupported);
      gQueueFamilyIndices.presentFamilySupportQueueIndex = presentFamilySupported ? i : -1;

      gQueueFamilyIndicesList[i]  = malloc(sizeof(QueueFamilyIndices));
      *gQueueFamilyIndicesList[i] = gQueueFamilyIndices;
    }

  *gQueueFamilyCount = queueFamilyCount;
}

void physicalDeviceTest(VkPhysicalDevice* gPhysicalDevice)
{

  printf("%p\n", gPhysicalDevice);

  VkPhysicalDeviceProperties pdProp;
  vkGetPhysicalDeviceProperties(*gPhysicalDevice, &pdProp);

  printf("%u\n", pdProp.deviceID);
  printf("%s\n", pdProp.deviceName);
  printf("%u\n", pdProp.apiVersion);
  printf("%u\n", pdProp.driverVersion);
  printf("%u\n", pdProp.vendorID);
}

void createVulkanContext()
{
  queueFamilyCount = 0;

  populatePhysicalDevice(&instance, &physicalDevice);
  physicalDeviceTest(&physicalDevice); // This one works fine

  populateQueueFamilies(physicalDevice, &queueFamilyIndicesList, &queueFamilyCount, surface);
  physicalDeviceTest(&physicalDevice); // This one causes segfault
}

诊断更多让我思考

      gQueueFamilyIndicesList[i]  = malloc(sizeof(QueueFamilyIndices));
      *gQueueFamilyIndicesList[i] = gQueueFamilyIndices;

因为将它们注释掉可以修复段错误。


TL;DR

再次调用相同的函数 (physicalDeviceTest) 导致段错误

void createVulkanContext()
{
  queueFamilyCount = 0;

  populatePhysicalDevice(&instance, &physicalDevice);
  physicalDeviceTest(&physicalDevice); // This one works fine

  populateQueueFamilies(physicalDevice, &queueFamilyIndicesList, &queueFamilyCount, surface);
  physicalDeviceTest(&physicalDevice); // This one causes segfault
}

可能是因为这些电话

      gQueueFamilyIndicesList[i]  = malloc(sizeof(QueueFamilyIndices));
      *gQueueFamilyIndicesList[i] = gQueueFamilyIndices;

在函数 populateQueueFamilies 中,虽然我不确定为什么或如何。

想通了。

*gQueueFamilyIndicesList = malloc(sizeof(QueueFamilyIndices) * queueFamilyCount);

现在正确分配内存并指向列表中的第一个内存块。

像这样取消引用和等同

(*gQueueFamilyIndicesList)[i] = gQueueFamilyIndices;

现在可以正常工作了。

gQueueFamilyIndicesList 周围的括号很重要,因为 [] 运算符优先于 *

*gQueueFamilyIndicesList[i] = *(*(gQueueFamilyIndicesList + i))

(*gQueueFamilyIndicesList)[i] = *((*gQueueFamilyIndicesList) + i)