生成的 SPIR-V 代码有什么问题以及如何验证它?

What is the problem with generated SPIR-V code and how to verify it?

我有一些生成的 SPIR-V 代码,我想将其与 vulkan API 一起使用。但我得到一个 Exception thrown at 0x00007FFB68D933CB (nvoglv64.dll) in vulkanCompute.exe: 0xC0000005: Access violation reading location 0x0000000000000008. 尝试使用 vkCreateComputePipelines 创建管道时。

API 调用应该没问题,因为相同的代码适用于使用 glslangValidator 编译的着色器。因此我假设生成的 SPIR-V 代码一定是格式错误的。

我用 validator tool from khronos 检查了 SPIR-V 代码,使用 spirv-val --target-env vulkan1.1 mainV.spv 没有错误退出。无论如何也知道这个工具仍然不完整。

我还尝试使用 Radeon GPU 分析器 来编译我的 SPIR-V 代码,该代码也可以在 shader playground 上在线获得,此工具抛出错误 Error: Error: internal error: Bil::BilInstructionConvert::Create(60) Code Not Tested! 并没有多大帮助,但会鼓励代码格式错误的假设。

不幸的是,SPIR-V 代码长到 post 这里,但它在 shader playground 的 link 中。

有谁知道我的设置有什么问题,或者知道我如何以更好的方式验证我的 SPIR-V 代码,而无需手动检查所有 700 行代码。

我不认为问题在那里,但无论如何这里是 c++ 主机代码:

#include "vulkan/vulkan.hpp"

#include <iostream>
#include <fstream>
#include <vector>
#define BAIL_ON_BAD_RESULT(result)                             \
if (VK_SUCCESS != (result))                                  \
{                                                            \
  fprintf(stderr, "Failure at %u %s\n", __LINE__, __FILE__); \
  exit(-1);                                                  \
}

VkResult vkGetBestComputeQueueNPH(vk::PhysicalDevice &physicalDevice, uint32_t &queueFamilyIndex)
{

  auto properties = physicalDevice.getQueueFamilyProperties();
  int i = 0;
  for (auto prop : properties)
  {
    vk::QueueFlags maskedFlags = (~(vk::QueueFlagBits::eTransfer | vk::QueueFlagBits::eSparseBinding) & prop.queueFlags);
    if (!(vk::QueueFlagBits::eGraphics & maskedFlags) && (vk::QueueFlagBits::eCompute & maskedFlags))
    {
      queueFamilyIndex = i;
      return VK_SUCCESS;
    }
    i++;
  }
  i = 0;
  for (auto prop : properties)
  {
    vk::QueueFlags maskedFlags = (~(vk::QueueFlagBits::eTransfer | vk::QueueFlagBits::eSparseBinding) & prop.queueFlags);
    if (vk::QueueFlagBits::eCompute & maskedFlags)
    {
      queueFamilyIndex = i;
      return VK_SUCCESS;
    }
    i++;
  }
  return VK_ERROR_INITIALIZATION_FAILED;
}

int main(int argc, const char *const argv[])
{
  (void)argc;
  (void)argv;

  try
  {

    // initialize the vk::ApplicationInfo structure
    vk::ApplicationInfo applicationInfo("VecAdd", 1, "Vulkan.hpp", 1, VK_API_VERSION_1_1);
    
    // initialize the vk::InstanceCreateInfo
    std::vector<char *> layers = {
      "VK_LAYER_LUNARG_api_dump",
      "VK_LAYER_KHRONOS_validation"
    };
    vk::InstanceCreateInfo instanceCreateInfo({}, &applicationInfo, static_cast<uint32_t>(layers.size()), layers.data());

    // create a UniqueInstance
    vk::UniqueInstance instance = vk::createInstanceUnique(instanceCreateInfo);

    auto physicalDevices = instance->enumeratePhysicalDevices();

    for (auto &physicalDevice : physicalDevices)
    {
    
      auto props = physicalDevice.getProperties();

      // get the QueueFamilyProperties of the first PhysicalDevice
      std::vector<vk::QueueFamilyProperties> queueFamilyProperties = physicalDevice.getQueueFamilyProperties();
      uint32_t computeQueueFamilyIndex = 0;

      // get the best index into queueFamiliyProperties which supports compute and stuff
      BAIL_ON_BAD_RESULT(vkGetBestComputeQueueNPH(physicalDevice, computeQueueFamilyIndex));

      std::vector<char *>extensions = {"VK_EXT_external_memory_host", "VK_KHR_shader_float16_int8"};
      // create a UniqueDevice
      float queuePriority = 0.0f;

      vk::DeviceQueueCreateInfo deviceQueueCreateInfo(vk::DeviceQueueCreateFlags(), static_cast<uint32_t>(computeQueueFamilyIndex), 1, &queuePriority);
       vk::StructureChain<vk::DeviceCreateInfo, vk::PhysicalDeviceFeatures2, vk::PhysicalDeviceShaderFloat16Int8Features> createDeviceInfo = {
        vk::DeviceCreateInfo(vk::DeviceCreateFlags(), 1, &deviceQueueCreateInfo, 0, nullptr, static_cast<uint32_t>(extensions.size()), extensions.data()),
        vk::PhysicalDeviceFeatures2(),
        vk::PhysicalDeviceShaderFloat16Int8Features()
      };
      createDeviceInfo.get<vk::PhysicalDeviceFeatures2>().features.setShaderInt64(true);
      createDeviceInfo.get<vk::PhysicalDeviceShaderFloat16Int8Features>().setShaderInt8(true);
      vk::UniqueDevice device = physicalDevice.createDeviceUnique(createDeviceInfo.get<vk::DeviceCreateInfo>());

      auto memoryProperties2 = physicalDevice.getMemoryProperties2();

      vk::PhysicalDeviceMemoryProperties const &memoryProperties = memoryProperties2.memoryProperties;

      const int32_t bufferLength = 16384;

      const uint32_t bufferSize = sizeof(int32_t) * bufferLength;

      // we are going to need two buffers from this one memory
      const vk::DeviceSize memorySize = bufferSize * 3;

      // set memoryTypeIndex to an invalid entry in the properties.memoryTypes array
      uint32_t memoryTypeIndex = VK_MAX_MEMORY_TYPES;

      for (uint32_t k = 0; k < memoryProperties.memoryTypeCount; k++)
      {
        if ((vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent) & memoryProperties.memoryTypes[k].propertyFlags &&
            (memorySize < memoryProperties.memoryHeaps[memoryProperties.memoryTypes[k].heapIndex].size))
        {
          memoryTypeIndex = k;
          std::cout << "found memory " << memoryTypeIndex + 1 << " out of " << memoryProperties.memoryTypeCount << std::endl;
          break;
        }
      }

      BAIL_ON_BAD_RESULT(memoryTypeIndex == VK_MAX_MEMORY_TYPES ? VK_ERROR_OUT_OF_HOST_MEMORY : VK_SUCCESS);

      auto memory = device->allocateMemoryUnique(vk::MemoryAllocateInfo(memorySize, memoryTypeIndex));
      auto in_buffer = device->createBufferUnique(vk::BufferCreateInfo(vk::BufferCreateFlags(), bufferSize, vk::BufferUsageFlagBits::eStorageBuffer, vk::SharingMode::eExclusive));
      device->bindBufferMemory(in_buffer.get(), memory.get(), 0);
      // create a DescriptorSetLayout
      std::vector<vk::DescriptorSetLayoutBinding> descriptorSetLayoutBinding{
          {0, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eCompute}};
      vk::UniqueDescriptorSetLayout descriptorSetLayout = device->createDescriptorSetLayoutUnique(vk::DescriptorSetLayoutCreateInfo(vk::DescriptorSetLayoutCreateFlags(), static_cast<uint32_t>(descriptorSetLayoutBinding.size()), descriptorSetLayoutBinding.data()));

      std::cout << "Memory bound" << std::endl;

      std::ifstream myfile;
      myfile.open("shaders/MainV.spv", std::ios::ate | std::ios::binary);

      if (!myfile.is_open())
      {
        std::cout << "File not found" << std::endl;
        return EXIT_FAILURE;
      }

      auto size = myfile.tellg();
      std::vector<unsigned int> shader_spv(size / sizeof(unsigned int));
      myfile.seekg(0);
      myfile.read(reinterpret_cast<char *>(shader_spv.data()), size);
      myfile.close();

      std::cout << "Shader size: " << shader_spv.size() << std::endl;

      auto shaderModule = device->createShaderModuleUnique(vk::ShaderModuleCreateInfo(vk::ShaderModuleCreateFlags(), shader_spv.size() * sizeof(unsigned int), shader_spv.data()));

      // create a PipelineLayout using that DescriptorSetLayout
      vk::UniquePipelineLayout pipelineLayout = device->createPipelineLayoutUnique(vk::PipelineLayoutCreateInfo(vk::PipelineLayoutCreateFlags(), 1, &descriptorSetLayout.get()));

      vk::ComputePipelineCreateInfo computePipelineInfo(
          vk::PipelineCreateFlags(),
          vk::PipelineShaderStageCreateInfo(
              vk::PipelineShaderStageCreateFlags(),
              vk::ShaderStageFlagBits::eCompute,
              shaderModule.get(),
              "_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE6VecAdd"),
          pipelineLayout.get());

      auto pipeline = device->createComputePipelineUnique(nullptr, computePipelineInfo);

      auto descriptorPoolSize = vk::DescriptorPoolSize(vk::DescriptorType::eStorageBuffer, 2);
      auto descriptorPool = device->createDescriptorPool(vk::DescriptorPoolCreateInfo(vk::DescriptorPoolCreateFlags(), 1, 1, &descriptorPoolSize));

      auto commandPool = device->createCommandPoolUnique(vk::CommandPoolCreateInfo(vk::CommandPoolCreateFlags(), computeQueueFamilyIndex));

      auto commandBuffer = std::move(device->allocateCommandBuffersUnique(vk::CommandBufferAllocateInfo(commandPool.get(), vk::CommandBufferLevel::ePrimary, 1)).front());

      commandBuffer->begin(vk::CommandBufferBeginInfo(vk::CommandBufferUsageFlags(vk::CommandBufferUsageFlagBits::eOneTimeSubmit)));

      commandBuffer->bindPipeline(vk::PipelineBindPoint::eCompute, pipeline.get());
    
      commandBuffer->dispatch(bufferSize / sizeof(int32_t), 1, 1);

      commandBuffer->end();

      auto queue = device->getQueue(computeQueueFamilyIndex, 0);

      vk::SubmitInfo submitInfo(0, nullptr, nullptr, 1, &commandBuffer.get(), 0, nullptr);
      queue.submit(1, &submitInfo, vk::Fence());

      queue.waitIdle();

      printf("all done\nWoohooo!!!\n\n");
    }
  }
  catch (vk::SystemError &err)
  {
    std::cout << "vk::SystemError: " << err.what() << std::endl;
    exit(-1);
  }
  catch (std::runtime_error &err)
  {
    std::cout << "std::runtime_error: " << err.what() << std::endl;
    exit(-1);
  }
  catch (...)
  {
    std::cout << "unknown error\n";
    exit(-1);
  }

  return EXIT_SUCCESS;
}

在逐行检查后发现问题出在使用指针的指针。对我来说,从规范中仍然不清楚它是不允许的,但它不适用于逻辑指针是可以理解的。 这种行为仍然很奇怪,验证器无法注意到这一点,并且编译 SPIRV 代码会崩溃,而不是抛出明确的错误消息。 所以最后还是Shader代码出错了