GPU 驱动程序中的 Vulkan 纹理映射不准确?

Vulkan Texture Mapping inaccuracies in GPU driver?

过去一周我一直对此有疑问,因为我无法理解这到底是什么。我正在使用 Vulkan 创建游戏,但是在为其设置渲染器后,渲染纹理的纹理映射出现了一些尴尬的不准确,导致它缩小到屏幕的四分之一,这对我来说没有意义。不仅如此,当我将我的 Nvidia 驱动程序更新到 388.0 时,问题不仅没有消失,而且纹理也在做同样的事情: This is the result of the final image display on an Nvidia GTX 870M with driver v388.0

另请注意,您之前看到的屏幕纹理已 "scaled" 到屏幕的右下四分之一(好像它已被调整为 width/2 x height/2) , 这是不正确的...

渲染器的实现遵循一个前向渲染通道、一个 hdr 通道,然后是最终输出到交换链图像通道。 Forward pass 和 HDR pass 使用它们自己的命令缓冲区提交给图形队列,它们由信号量发出信号。

  VkCommandBuffer offscreenCmd = mOffscreen.cmdBuffer->Handle();
  VkSemaphore waitSemas[] = { mRhi->SwapchainObject()->ImageAvailableSemaphore() };
  VkSemaphore signalSemas[] = { mOffscreen.semaphore->Handle() };
  VkPipelineStageFlags waitFlags[] = { VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT };

  VkSubmitInfo offscreenSI = {};
  offscreenSI.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
  offscreenSI.pCommandBuffers = &offscreenCmd;
  offscreenSI.commandBufferCount = 1;
  offscreenSI.signalSemaphoreCount = 1;
  offscreenSI.pSignalSemaphores = signalSemas;
  offscreenSI.waitSemaphoreCount = 1;
  offscreenSI.pWaitSemaphores = waitSemas;
  offscreenSI.pWaitDstStageMask = waitFlags;

  VkSubmitInfo hdrSI = offscreenSI;
  VkSemaphore hdrWaits[] = { mOffscreen.semaphore->Handle() };
  VkSemaphore hdrSignal[] = { mHDR.semaphore->Handle() };
  VkCommandBuffer hdrCmd = mHDR.cmdBuffer->Handle();
  hdrSI.pCommandBuffers = &hdrCmd;
  hdrSI.pSignalSemaphores = hdrSignal;
  hdrSI.pWaitSemaphores = hdrWaits;

  // Update materials before rendering the frame.
  UpdateMaterials();

  // begin frame. This is where we start our render process per frame.
  BeginFrame();
  while (mOffscreen.cmdBuffer->Recording() || !mRhi->CmdBuffersComplete()) {}

    // Offscreen PBR Forward Rendering Pass.
    mRhi->GraphicsSubmit(offscreenSI);

    // High Dynamic Range and Gamma Pass.
    mRhi->GraphicsSubmit(hdrSI);

    // Before calling this cmd buffer, we want to submit our offscreen buffer first, then
    // ssent our signal to our swapchain cmd buffers.
    VkSemaphore waitSemaphores[] = { mHDR.semaphore->Handle() };
    mRhi->SubmitCurrSwapchainCmdBuffer(1, waitSemaphores);

    // Render the Overlay.
    RenderOverlay();

  EndFrame();

更有趣的是,当我 运行 在具有第 6 代 gpu 和 vulkan 支持的 Intel Kaby Lake cpu 上使用相同的代码时,the output image was exactly correct, as expected!

所以我不确定这是否是驱动程序错误:看看我是如何实现渲染纹理的:

void Renderer::SetUpRenderTextures()
{
  Texture* pbrColor = mRhi->CreateTexture();
  Texture* pbrDepth = mRhi->CreateTexture();
  Sampler* pbrSampler = mRhi->CreateSampler();
  Texture* hdrTexture = mRhi->CreateTexture();
  Sampler* hdrSampler = mRhi->CreateSampler();

  gResources().RegisterSampler("HDRGammaSampler", hdrSampler);
  gResources().RegisterRenderTexture("HDRGammaTexture", hdrTexture);
  gResources().RegisterRenderTexture("PBRColor", pbrColor);
  gResources().RegisterRenderTexture("PBRDepth", pbrDepth);
  gResources().RegisterSampler("PBRSampler", pbrSampler);

  VkImageCreateInfo cImageInfo = { };
  VkImageViewCreateInfo cViewInfo = { };

  cImageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
  cImageInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
  cImageInfo.imageType = VK_IMAGE_TYPE_2D;
  cImageInfo.format = VK_FORMAT_R16G16B16A16_SFLOAT;
  cImageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
  cImageInfo.mipLevels = 1;
  cImageInfo.extent.depth = 1;
  cImageInfo.arrayLayers = 1;
  cImageInfo.extent.width = mWindowHandle->Width();
  cImageInfo.extent.height = mWindowHandle->Height();
  cImageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
  cImageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
  cImageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;

  cViewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; 
  cViewInfo.format = VK_FORMAT_R16G16B16A16_SFLOAT;
  cViewInfo.image = nullptr; // No need to set the image, texture->Initialize() handles this for us.
  cViewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D;
  cViewInfo.subresourceRange = { };
  cViewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
  cViewInfo.subresourceRange.baseArrayLayer = 0;
  cViewInfo.subresourceRange.baseMipLevel = 0;
  cViewInfo.subresourceRange.layerCount = 1;
  cViewInfo.subresourceRange.levelCount = 1;

  pbrColor->Initialize(cImageInfo, cViewInfo);
  // Using the same info, only we are chaning the format to rgba8 unorm attachments
  cImageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
  cViewInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
  hdrTexture->Initialize(cImageInfo, cViewInfo);

  cImageInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
  cImageInfo.format = VK_FORMAT_D24_UNORM_S8_UINT;

  cViewInfo.format = VK_FORMAT_D24_UNORM_S8_UINT;
  cViewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;

  pbrDepth->Initialize(cImageInfo, cViewInfo);

  VkSamplerCreateInfo samplerCI = { };
  samplerCI.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
  samplerCI.magFilter = VK_FILTER_LINEAR;
  samplerCI.minFilter = VK_FILTER_LINEAR;
  samplerCI.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
  samplerCI.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
  samplerCI.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
  samplerCI.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
  samplerCI.compareEnable = VK_FALSE;
  samplerCI.mipLodBias = 0.0f;
  samplerCI.maxAnisotropy = 16.0f;
  samplerCI.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
  samplerCI.maxLod = 1.0f;
  samplerCI.minLod = 0.0f;
  samplerCI.unnormalizedCoordinates = VK_FALSE;

  pbrSampler->Initialize(samplerCI);
  hdrSampler->Initialize(samplerCI);

  Sampler* defaultSampler = mRhi->CreateSampler();
  defaultSampler->Initialize(samplerCI);
  gResources().RegisterSampler("DefaultSampler", defaultSampler);

  VkImageCreateInfo dImageInfo = {};
  VkImageViewCreateInfo dViewInfo = {};

  dImageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
  dImageInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
  dImageInfo.imageType = VK_IMAGE_TYPE_2D;
  dImageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
  dImageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
  dImageInfo.mipLevels = 1;
  dImageInfo.extent.depth = 1;
  dImageInfo.arrayLayers = 1;
  dImageInfo.extent.width = mWindowHandle->Width();
  dImageInfo.extent.height = mWindowHandle->Height();
  dImageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
  dImageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
  dImageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;

  dViewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
  dViewInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
  dViewInfo.image = nullptr; // No need to set the image, texture handles this for us.
  dViewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D;
  dViewInfo.subresourceRange = {};
  dViewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
  dViewInfo.subresourceRange.baseArrayLayer = 0;
  dViewInfo.subresourceRange.baseMipLevel = 0;
  dViewInfo.subresourceRange.layerCount = 1;
  dViewInfo.subresourceRange.levelCount = 1;

  Texture* defaultTexture = mRhi->CreateTexture();

  defaultTexture->Initialize(dImageInfo, dViewInfo);
  gResources().RegisterRenderTexture("DefaultTexture", defaultTexture);
}

和帧缓冲区:

void Renderer::SetUpFrameBuffers()
{
  Texture* pbrColor = gResources().GetRenderTexture("PBRColor");
  Texture* pbrDepth = gResources().GetRenderTexture("PBRDepth");

  FrameBuffer* pbrFrameBuffer = mRhi->CreateFrameBuffer();
  gResources().RegisterFrameBuffer("PBRFrameBuffer", pbrFrameBuffer);

  FrameBuffer* hdrFrameBuffer = mRhi->CreateFrameBuffer();
  gResources().RegisterFrameBuffer("HDRGammaFrameBuffer", hdrFrameBuffer);


  VkAttachmentDescription attachmentDescriptions[2];
  attachmentDescriptions[0].format = VK_FORMAT_R16G16B16A16_SFLOAT;
  attachmentDescriptions[0].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
  attachmentDescriptions[0].finalLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
  attachmentDescriptions[0].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
  attachmentDescriptions[0].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
  attachmentDescriptions[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
  attachmentDescriptions[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
  attachmentDescriptions[0].samples = VK_SAMPLE_COUNT_1_BIT;
  attachmentDescriptions[0].flags = 0;

  attachmentDescriptions[1].format = VK_FORMAT_D24_UNORM_S8_UINT;
  attachmentDescriptions[1].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
  attachmentDescriptions[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
  attachmentDescriptions[1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
  attachmentDescriptions[1].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
  attachmentDescriptions[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
  attachmentDescriptions[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
  attachmentDescriptions[1].samples = VK_SAMPLE_COUNT_1_BIT;
  attachmentDescriptions[1].flags = 0;   

  VkSubpassDependency dependencies[2];
  dependencies[0].srcSubpass = VK_SUBPASS_EXTERNAL;
  dependencies[0].dstSubpass = 0;
  dependencies[0].srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
  dependencies[0].dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
  dependencies[0].srcAccessMask = VK_ACCESS_MEMORY_READ_BIT;
  dependencies[0].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
  dependencies[0].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;  

  dependencies[1].srcSubpass = 0;
  dependencies[1].dstSubpass = VK_SUBPASS_EXTERNAL;
  dependencies[1].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
  dependencies[1].dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
  dependencies[1].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
  dependencies[1].dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
  dependencies[1].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;

  VkAttachmentReference attachmentColorRef = { 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL };
  VkAttachmentReference attachmentDepthRef = { 1, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL };

  VkSubpassDescription subpass = { };
  subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
  subpass.colorAttachmentCount = 1;
  subpass.pColorAttachments = &attachmentColorRef;
  subpass.pDepthStencilAttachment = &attachmentDepthRef;

  VkRenderPassCreateInfo renderpassCI = { };
  renderpassCI.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
  renderpassCI.attachmentCount = 2;
  renderpassCI.pAttachments = attachmentDescriptions;
  renderpassCI.subpassCount = 1;
  renderpassCI.pSubpasses = &subpass;
  renderpassCI.dependencyCount = 2;
  renderpassCI.pDependencies = dependencies;


  VkImageView attachments[2];
  attachments[0] = pbrColor->View();
  attachments[1] = pbrDepth->View();

  VkFramebufferCreateInfo framebufferCI = {};
  framebufferCI.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
  framebufferCI.height = mWindowHandle->Height();
  framebufferCI.width = mWindowHandle->Width();
  framebufferCI.renderPass = nullptr; // The finalize call handles this for us.
  framebufferCI.layers = 1;
  framebufferCI.attachmentCount = 2;
  framebufferCI.pAttachments = attachments;

  pbrFrameBuffer->Finalize(framebufferCI, renderpassCI);

  // No need to render any depth, as we are only writing on a 2d surface.
  subpass.pDepthStencilAttachment = nullptr;
  attachments[0] = gResources().GetRenderTexture("HDRGammaTexture")->View();
  attachments[1] = nullptr;
  framebufferCI.attachmentCount = 1;
  attachmentDescriptions[0].format = VK_FORMAT_R8G8B8A8_UNORM;
  renderpassCI.attachmentCount = 1;

  hdrFrameBuffer->Finalize(framebufferCI, renderpassCI);
}

最后,看看纹理是如何初始化的:

void Sampler::Initialize(VkSamplerCreateInfo& info)
{
  if (vkCreateSampler(mOwner, &info, nullptr, &mSampler) != VK_SUCCESS) {
    R_DEBUG("ERROR: Sampler failed to initialize!\n");
  }
}


void Sampler::CleanUp()
{
  if (mSampler) {
    vkDestroySampler(mOwner, mSampler, nullptr);
    mSampler = VK_NULL_HANDLE;
  }
}


void Texture::Initialize(const VkImageCreateInfo& imageInfo, 
  VkImageViewCreateInfo& viewInfo, b8 stream) // Ignore "stream" as it doesnt do anything yet...
{
  if (vkCreateImage(mOwner, &imageInfo, nullptr, &mImage) != VK_SUCCESS) {
    R_DEBUG("ERROR: Failed to create image!\n");
    return;
  }

  VkMemoryRequirements memoryRequirements;
  vkGetImageMemoryRequirements(mOwner, mImage, &memoryRequirements);
  VkMemoryAllocateInfo allocInfo = { };
  allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
  allocInfo.allocationSize = memoryRequirements.size;
  allocInfo.memoryTypeIndex = VulkanRHI::gPhysicalDevice.FindMemoryType(memoryRequirements.memoryTypeBits, 
    VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);

  if (vkAllocateMemory(mOwner, &allocInfo, nullptr, &mMemory) != VK_SUCCESS) {
    R_DEBUG("ERROR: Failed to allocate host memory for image!\n");
    return;
  }

  if (vkBindImageMemory(mOwner, mImage, mMemory, 0) != VK_SUCCESS) {
    R_DEBUG("ERROR: Failed to bind memory to image!\n");
    return;
  }

  viewInfo.image = mImage;
  if (vkCreateImageView(mOwner, &viewInfo, nullptr, &mView) != VK_SUCCESS) {
    R_DEBUG("ERROR: Failed to create image view!\n");
  }
}


void Texture::CleanUp()
{
  if (mImage) {
    vkDestroyImage(mOwner, mImage, nullptr);
    mImage = VK_NULL_HANDLE;
  }

  if (mView) {
    vkDestroyImageView(mOwner, mView, nullptr);
    mView = VK_NULL_HANDLE;
  }

  if (mMemory) {
    vkFreeMemory(mOwner, mMemory, nullptr);
    mMemory = VK_NULL_HANDLE;
  }
}


void Texture::Upload(VulkanRHI* rhi, Recluse::Image const& image)
{
  VkDeviceSize imageSize = image.Width() * image.Height() * 4;
  Buffer stagingBuffer;
  stagingBuffer.SetOwner(mOwner);

  VkBufferCreateInfo stagingCI = { };
  stagingCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
  stagingCI.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
  stagingCI.size = imageSize;
  stagingCI.sharingMode = VK_SHARING_MODE_EXCLUSIVE;

  stagingBuffer.Initialize(stagingCI, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);

  VkResult result = stagingBuffer.Map();
    memcpy(stagingBuffer.Mapped(), image.Data(), imageSize);
  stagingBuffer.UnMap();

  CommandBuffer buffer;
  buffer.SetOwner(mOwner);
  buffer.Allocate(rhi->GraphicsCmdPool(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);

  VkCommandBufferBeginInfo beginInfo = { };
  beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
  beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;

  // TODO(): Copy buffer to image stream.
  buffer.Begin(beginInfo);
    VkImageMemoryBarrier imgBarrier = { };
    imgBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
    imgBarrier.image = mImage;
    imgBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
    imgBarrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
    imgBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
    imgBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
    imgBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
    imgBarrier.srcAccessMask = 0;
    imgBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
    imgBarrier.subresourceRange.baseArrayLayer = 0;
    imgBarrier.subresourceRange.baseMipLevel = 0;
    imgBarrier.subresourceRange.layerCount = 1;
    imgBarrier.subresourceRange.levelCount = 1;

    // Image memory barrier.
    buffer.PipelineBarrier(
      VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 
      0,  
      0, nullptr, 
      0, nullptr, 
      1, &imgBarrier
    );

    VkBufferImageCopy region = { };
    region.bufferOffset = 0;
    region.bufferImageHeight = 0;
    region.bufferRowLength = 0;
    region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
    region.imageSubresource.baseArrayLayer = 0;
    region.imageSubresource.layerCount = 1;
    region.imageSubresource.mipLevel = 0;
    region.imageExtent.width = image.Width();
    region.imageExtent.height = image.Height();
    region.imageExtent.depth = 1;
    region.imageOffset = { 0, 0, 0 };

    // Send buffer image copy cmd.
    buffer.CopyBufferToImage(stagingBuffer.Handle(), mImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL , 1, &region);

    imgBarrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
    imgBarrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
    imgBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
    imgBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;

    buffer.PipelineBarrier(
      VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
      0,
      0, nullptr,
      0, nullptr,
      1, &imgBarrier
    );

  buffer.End();

  // TODO(): Submit it to graphics queue!
  VkCommandBuffer commandbuffers[] = { buffer.Handle() };

  VkSubmitInfo submit = { };
  submit.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
  submit.commandBufferCount = 1;
  submit.pCommandBuffers = commandbuffers;

  rhi->GraphicsSubmit(submit);
  rhi->GraphicsWaitIdle();

  buffer.Free();
  stagingBuffer.CleanUp();
}

如果这是我显示的大量代码和信息,以及许多糟糕的设计决策,我深表歉意,但我的问题是这是否是驱动程序错误,或者我是否在我的代码中做了什么这是不正确的,以便显示具有适当纹理映射的最终输出?另外,我对这个问题也不是很了解,所以如果我对我试图解释的内容没有任何意义,我再次道歉。

使用 RenderDoc,我还获得了有关 nvidia 和 intel gpus 渲染管道中每个渲染通道的信息:

Intel 6th Gen
Nvidia GTX870M

你可以在我的 github 中看到引擎的源代码,以获得渲染器的更大图片,它位于引擎目录中,它的实现在 this link

如果您正在寻找编译程序的简单 "works right out of the box" 版本,您可以在此处下载此 link: Recluse Zip

从那里解压并在Build64目录中找到Recluse.sln文件,然后编译并设置"Test"作为启动项目。或者您可以在 Release/Debug 输出目录中找到 exe。

好吧,我是个白痴。问题不在于渲染器的实现,而在于着色器。查看 HDRGamma.frag 着色器(HDR Pass 使用的着色器),在 Shader/Source 目录中:

#version 430
#extension GL_ARB_separate_shader_objects : enable
#extension GL_ARB_shading_language_420pack : enable

layout (location = 0) out vec4 fragColor;

in FRAG_IN {
  vec2 position;
  vec2 uv;
} frag_in;


layout (set = 0, binding = 0) uniform sampler2D sceneSurface;
layout (set = 0, binding = 1) uniform sampler2D bloomSurface;

layout (set = 0, binding = 2) uniform HDR {
  float gamma;
  float exposure;
  float pad[2];
  int   bloomEnabled;
  int   pad1[3];
} hdr;

你看到统一的 HDR 被填充成 4 个浮点数,4 个整数,我以为我正在对齐缓冲区,但事实并非如此,特别是因为在着色器中无法正确读取传递的 bloomEnabled。对于 Intel GPU,我仍然不确定为什么它在这里没问题,但对于 Nvidia,这不是一个正确的设计。如果我们这样打包缓冲区:

layout (set = 0, binding = 0) uniform sampler2D sceneSurface;
layout (set = 0, binding = 1) uniform sampler2D bloomSurface;

layout (set = 0, binding = 2) uniform HDR {
  float gamma;
  float exposure;
  int   bloomEnabled;
  int   pad1;
} hdr;

实现正确,gpu还是读取了16个字节,bloomEnabled可以被shader正常读取。事实证明,如果您没有正确对齐缓冲区,Nvidia 会做一些有趣的事情,但我仍然不确定为什么,另一方面,英特尔似乎有办法确保输出颜色附件的完整性。

不管怎样,现在一切似乎都很好。更新了源代码以查看工作实现。