GPU 驱动程序中的 Vulkan 纹理映射不准确?
Vulkan Texture Mapping inaccuracies in GPU driver?
过去一周我一直对此有疑问,因为我无法理解这到底是什么。我正在使用 Vulkan 创建游戏,但是在为其设置渲染器后,渲染纹理的纹理映射出现了一些尴尬的不准确,导致它缩小到屏幕的四分之一,这对我来说没有意义。不仅如此,当我将我的 Nvidia 驱动程序更新到 388.0 时,问题不仅没有消失,而且纹理也在做同样的事情:
This is the result of the final image display on an Nvidia GTX 870M with driver v388.0
另请注意,您之前看到的屏幕纹理已 "scaled" 到屏幕的右下四分之一(好像它已被调整为 width/2 x height/2) , 这是不正确的...
渲染器的实现遵循一个前向渲染通道、一个 hdr 通道,然后是最终输出到交换链图像通道。 Forward pass 和 HDR pass 使用它们自己的命令缓冲区提交给图形队列,它们由信号量发出信号。
VkCommandBuffer offscreenCmd = mOffscreen.cmdBuffer->Handle();
VkSemaphore waitSemas[] = { mRhi->SwapchainObject()->ImageAvailableSemaphore() };
VkSemaphore signalSemas[] = { mOffscreen.semaphore->Handle() };
VkPipelineStageFlags waitFlags[] = { VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT };
VkSubmitInfo offscreenSI = {};
offscreenSI.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
offscreenSI.pCommandBuffers = &offscreenCmd;
offscreenSI.commandBufferCount = 1;
offscreenSI.signalSemaphoreCount = 1;
offscreenSI.pSignalSemaphores = signalSemas;
offscreenSI.waitSemaphoreCount = 1;
offscreenSI.pWaitSemaphores = waitSemas;
offscreenSI.pWaitDstStageMask = waitFlags;
VkSubmitInfo hdrSI = offscreenSI;
VkSemaphore hdrWaits[] = { mOffscreen.semaphore->Handle() };
VkSemaphore hdrSignal[] = { mHDR.semaphore->Handle() };
VkCommandBuffer hdrCmd = mHDR.cmdBuffer->Handle();
hdrSI.pCommandBuffers = &hdrCmd;
hdrSI.pSignalSemaphores = hdrSignal;
hdrSI.pWaitSemaphores = hdrWaits;
// Update materials before rendering the frame.
UpdateMaterials();
// begin frame. This is where we start our render process per frame.
BeginFrame();
while (mOffscreen.cmdBuffer->Recording() || !mRhi->CmdBuffersComplete()) {}
// Offscreen PBR Forward Rendering Pass.
mRhi->GraphicsSubmit(offscreenSI);
// High Dynamic Range and Gamma Pass.
mRhi->GraphicsSubmit(hdrSI);
// Before calling this cmd buffer, we want to submit our offscreen buffer first, then
// ssent our signal to our swapchain cmd buffers.
VkSemaphore waitSemaphores[] = { mHDR.semaphore->Handle() };
mRhi->SubmitCurrSwapchainCmdBuffer(1, waitSemaphores);
// Render the Overlay.
RenderOverlay();
EndFrame();
更有趣的是,当我 运行 在具有第 6 代 gpu 和 vulkan 支持的 Intel Kaby Lake cpu 上使用相同的代码时,the output image was exactly correct, as expected!
所以我不确定这是否是驱动程序错误:看看我是如何实现渲染纹理的:
void Renderer::SetUpRenderTextures()
{
Texture* pbrColor = mRhi->CreateTexture();
Texture* pbrDepth = mRhi->CreateTexture();
Sampler* pbrSampler = mRhi->CreateSampler();
Texture* hdrTexture = mRhi->CreateTexture();
Sampler* hdrSampler = mRhi->CreateSampler();
gResources().RegisterSampler("HDRGammaSampler", hdrSampler);
gResources().RegisterRenderTexture("HDRGammaTexture", hdrTexture);
gResources().RegisterRenderTexture("PBRColor", pbrColor);
gResources().RegisterRenderTexture("PBRDepth", pbrDepth);
gResources().RegisterSampler("PBRSampler", pbrSampler);
VkImageCreateInfo cImageInfo = { };
VkImageViewCreateInfo cViewInfo = { };
cImageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
cImageInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
cImageInfo.imageType = VK_IMAGE_TYPE_2D;
cImageInfo.format = VK_FORMAT_R16G16B16A16_SFLOAT;
cImageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
cImageInfo.mipLevels = 1;
cImageInfo.extent.depth = 1;
cImageInfo.arrayLayers = 1;
cImageInfo.extent.width = mWindowHandle->Width();
cImageInfo.extent.height = mWindowHandle->Height();
cImageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
cImageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
cImageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
cViewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
cViewInfo.format = VK_FORMAT_R16G16B16A16_SFLOAT;
cViewInfo.image = nullptr; // No need to set the image, texture->Initialize() handles this for us.
cViewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D;
cViewInfo.subresourceRange = { };
cViewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
cViewInfo.subresourceRange.baseArrayLayer = 0;
cViewInfo.subresourceRange.baseMipLevel = 0;
cViewInfo.subresourceRange.layerCount = 1;
cViewInfo.subresourceRange.levelCount = 1;
pbrColor->Initialize(cImageInfo, cViewInfo);
// Using the same info, only we are chaning the format to rgba8 unorm attachments
cImageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
cViewInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
hdrTexture->Initialize(cImageInfo, cViewInfo);
cImageInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
cImageInfo.format = VK_FORMAT_D24_UNORM_S8_UINT;
cViewInfo.format = VK_FORMAT_D24_UNORM_S8_UINT;
cViewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
pbrDepth->Initialize(cImageInfo, cViewInfo);
VkSamplerCreateInfo samplerCI = { };
samplerCI.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
samplerCI.magFilter = VK_FILTER_LINEAR;
samplerCI.minFilter = VK_FILTER_LINEAR;
samplerCI.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
samplerCI.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
samplerCI.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
samplerCI.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
samplerCI.compareEnable = VK_FALSE;
samplerCI.mipLodBias = 0.0f;
samplerCI.maxAnisotropy = 16.0f;
samplerCI.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
samplerCI.maxLod = 1.0f;
samplerCI.minLod = 0.0f;
samplerCI.unnormalizedCoordinates = VK_FALSE;
pbrSampler->Initialize(samplerCI);
hdrSampler->Initialize(samplerCI);
Sampler* defaultSampler = mRhi->CreateSampler();
defaultSampler->Initialize(samplerCI);
gResources().RegisterSampler("DefaultSampler", defaultSampler);
VkImageCreateInfo dImageInfo = {};
VkImageViewCreateInfo dViewInfo = {};
dImageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
dImageInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
dImageInfo.imageType = VK_IMAGE_TYPE_2D;
dImageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
dImageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
dImageInfo.mipLevels = 1;
dImageInfo.extent.depth = 1;
dImageInfo.arrayLayers = 1;
dImageInfo.extent.width = mWindowHandle->Width();
dImageInfo.extent.height = mWindowHandle->Height();
dImageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
dImageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
dImageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
dViewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
dViewInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
dViewInfo.image = nullptr; // No need to set the image, texture handles this for us.
dViewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D;
dViewInfo.subresourceRange = {};
dViewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
dViewInfo.subresourceRange.baseArrayLayer = 0;
dViewInfo.subresourceRange.baseMipLevel = 0;
dViewInfo.subresourceRange.layerCount = 1;
dViewInfo.subresourceRange.levelCount = 1;
Texture* defaultTexture = mRhi->CreateTexture();
defaultTexture->Initialize(dImageInfo, dViewInfo);
gResources().RegisterRenderTexture("DefaultTexture", defaultTexture);
}
和帧缓冲区:
void Renderer::SetUpFrameBuffers()
{
Texture* pbrColor = gResources().GetRenderTexture("PBRColor");
Texture* pbrDepth = gResources().GetRenderTexture("PBRDepth");
FrameBuffer* pbrFrameBuffer = mRhi->CreateFrameBuffer();
gResources().RegisterFrameBuffer("PBRFrameBuffer", pbrFrameBuffer);
FrameBuffer* hdrFrameBuffer = mRhi->CreateFrameBuffer();
gResources().RegisterFrameBuffer("HDRGammaFrameBuffer", hdrFrameBuffer);
VkAttachmentDescription attachmentDescriptions[2];
attachmentDescriptions[0].format = VK_FORMAT_R16G16B16A16_SFLOAT;
attachmentDescriptions[0].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
attachmentDescriptions[0].finalLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
attachmentDescriptions[0].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
attachmentDescriptions[0].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachmentDescriptions[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachmentDescriptions[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachmentDescriptions[0].samples = VK_SAMPLE_COUNT_1_BIT;
attachmentDescriptions[0].flags = 0;
attachmentDescriptions[1].format = VK_FORMAT_D24_UNORM_S8_UINT;
attachmentDescriptions[1].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
attachmentDescriptions[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachmentDescriptions[1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
attachmentDescriptions[1].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachmentDescriptions[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachmentDescriptions[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachmentDescriptions[1].samples = VK_SAMPLE_COUNT_1_BIT;
attachmentDescriptions[1].flags = 0;
VkSubpassDependency dependencies[2];
dependencies[0].srcSubpass = VK_SUBPASS_EXTERNAL;
dependencies[0].dstSubpass = 0;
dependencies[0].srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
dependencies[0].dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
dependencies[0].srcAccessMask = VK_ACCESS_MEMORY_READ_BIT;
dependencies[0].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
dependencies[0].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
dependencies[1].srcSubpass = 0;
dependencies[1].dstSubpass = VK_SUBPASS_EXTERNAL;
dependencies[1].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
dependencies[1].dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
dependencies[1].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
dependencies[1].dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
dependencies[1].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
VkAttachmentReference attachmentColorRef = { 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL };
VkAttachmentReference attachmentDepthRef = { 1, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL };
VkSubpassDescription subpass = { };
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass.colorAttachmentCount = 1;
subpass.pColorAttachments = &attachmentColorRef;
subpass.pDepthStencilAttachment = &attachmentDepthRef;
VkRenderPassCreateInfo renderpassCI = { };
renderpassCI.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
renderpassCI.attachmentCount = 2;
renderpassCI.pAttachments = attachmentDescriptions;
renderpassCI.subpassCount = 1;
renderpassCI.pSubpasses = &subpass;
renderpassCI.dependencyCount = 2;
renderpassCI.pDependencies = dependencies;
VkImageView attachments[2];
attachments[0] = pbrColor->View();
attachments[1] = pbrDepth->View();
VkFramebufferCreateInfo framebufferCI = {};
framebufferCI.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
framebufferCI.height = mWindowHandle->Height();
framebufferCI.width = mWindowHandle->Width();
framebufferCI.renderPass = nullptr; // The finalize call handles this for us.
framebufferCI.layers = 1;
framebufferCI.attachmentCount = 2;
framebufferCI.pAttachments = attachments;
pbrFrameBuffer->Finalize(framebufferCI, renderpassCI);
// No need to render any depth, as we are only writing on a 2d surface.
subpass.pDepthStencilAttachment = nullptr;
attachments[0] = gResources().GetRenderTexture("HDRGammaTexture")->View();
attachments[1] = nullptr;
framebufferCI.attachmentCount = 1;
attachmentDescriptions[0].format = VK_FORMAT_R8G8B8A8_UNORM;
renderpassCI.attachmentCount = 1;
hdrFrameBuffer->Finalize(framebufferCI, renderpassCI);
}
最后,看看纹理是如何初始化的:
void Sampler::Initialize(VkSamplerCreateInfo& info)
{
if (vkCreateSampler(mOwner, &info, nullptr, &mSampler) != VK_SUCCESS) {
R_DEBUG("ERROR: Sampler failed to initialize!\n");
}
}
void Sampler::CleanUp()
{
if (mSampler) {
vkDestroySampler(mOwner, mSampler, nullptr);
mSampler = VK_NULL_HANDLE;
}
}
void Texture::Initialize(const VkImageCreateInfo& imageInfo,
VkImageViewCreateInfo& viewInfo, b8 stream) // Ignore "stream" as it doesnt do anything yet...
{
if (vkCreateImage(mOwner, &imageInfo, nullptr, &mImage) != VK_SUCCESS) {
R_DEBUG("ERROR: Failed to create image!\n");
return;
}
VkMemoryRequirements memoryRequirements;
vkGetImageMemoryRequirements(mOwner, mImage, &memoryRequirements);
VkMemoryAllocateInfo allocInfo = { };
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocInfo.allocationSize = memoryRequirements.size;
allocInfo.memoryTypeIndex = VulkanRHI::gPhysicalDevice.FindMemoryType(memoryRequirements.memoryTypeBits,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
if (vkAllocateMemory(mOwner, &allocInfo, nullptr, &mMemory) != VK_SUCCESS) {
R_DEBUG("ERROR: Failed to allocate host memory for image!\n");
return;
}
if (vkBindImageMemory(mOwner, mImage, mMemory, 0) != VK_SUCCESS) {
R_DEBUG("ERROR: Failed to bind memory to image!\n");
return;
}
viewInfo.image = mImage;
if (vkCreateImageView(mOwner, &viewInfo, nullptr, &mView) != VK_SUCCESS) {
R_DEBUG("ERROR: Failed to create image view!\n");
}
}
void Texture::CleanUp()
{
if (mImage) {
vkDestroyImage(mOwner, mImage, nullptr);
mImage = VK_NULL_HANDLE;
}
if (mView) {
vkDestroyImageView(mOwner, mView, nullptr);
mView = VK_NULL_HANDLE;
}
if (mMemory) {
vkFreeMemory(mOwner, mMemory, nullptr);
mMemory = VK_NULL_HANDLE;
}
}
void Texture::Upload(VulkanRHI* rhi, Recluse::Image const& image)
{
VkDeviceSize imageSize = image.Width() * image.Height() * 4;
Buffer stagingBuffer;
stagingBuffer.SetOwner(mOwner);
VkBufferCreateInfo stagingCI = { };
stagingCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
stagingCI.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
stagingCI.size = imageSize;
stagingCI.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
stagingBuffer.Initialize(stagingCI, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
VkResult result = stagingBuffer.Map();
memcpy(stagingBuffer.Mapped(), image.Data(), imageSize);
stagingBuffer.UnMap();
CommandBuffer buffer;
buffer.SetOwner(mOwner);
buffer.Allocate(rhi->GraphicsCmdPool(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
VkCommandBufferBeginInfo beginInfo = { };
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
// TODO(): Copy buffer to image stream.
buffer.Begin(beginInfo);
VkImageMemoryBarrier imgBarrier = { };
imgBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
imgBarrier.image = mImage;
imgBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
imgBarrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
imgBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imgBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imgBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
imgBarrier.srcAccessMask = 0;
imgBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
imgBarrier.subresourceRange.baseArrayLayer = 0;
imgBarrier.subresourceRange.baseMipLevel = 0;
imgBarrier.subresourceRange.layerCount = 1;
imgBarrier.subresourceRange.levelCount = 1;
// Image memory barrier.
buffer.PipelineBarrier(
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0,
0, nullptr,
0, nullptr,
1, &imgBarrier
);
VkBufferImageCopy region = { };
region.bufferOffset = 0;
region.bufferImageHeight = 0;
region.bufferRowLength = 0;
region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
region.imageSubresource.baseArrayLayer = 0;
region.imageSubresource.layerCount = 1;
region.imageSubresource.mipLevel = 0;
region.imageExtent.width = image.Width();
region.imageExtent.height = image.Height();
region.imageExtent.depth = 1;
region.imageOffset = { 0, 0, 0 };
// Send buffer image copy cmd.
buffer.CopyBufferToImage(stagingBuffer.Handle(), mImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL , 1, ®ion);
imgBarrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
imgBarrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
imgBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
imgBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
buffer.PipelineBarrier(
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
0,
0, nullptr,
0, nullptr,
1, &imgBarrier
);
buffer.End();
// TODO(): Submit it to graphics queue!
VkCommandBuffer commandbuffers[] = { buffer.Handle() };
VkSubmitInfo submit = { };
submit.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submit.commandBufferCount = 1;
submit.pCommandBuffers = commandbuffers;
rhi->GraphicsSubmit(submit);
rhi->GraphicsWaitIdle();
buffer.Free();
stagingBuffer.CleanUp();
}
如果这是我显示的大量代码和信息,以及许多糟糕的设计决策,我深表歉意,但我的问题是这是否是驱动程序错误,或者我是否在我的代码中做了什么这是不正确的,以便显示具有适当纹理映射的最终输出?另外,我对这个问题也不是很了解,所以如果我对我试图解释的内容没有任何意义,我再次道歉。
使用 RenderDoc,我还获得了有关 nvidia 和 intel gpus 渲染管道中每个渲染通道的信息:
你可以在我的 github 中看到引擎的源代码,以获得渲染器的更大图片,它位于引擎目录中,它的实现在
this link
如果您正在寻找编译程序的简单 "works right out of the box" 版本,您可以在此处下载此 link:
Recluse Zip
从那里解压并在Build64目录中找到Recluse.sln文件,然后编译并设置"Test"作为启动项目。或者您可以在 Release/Debug 输出目录中找到 exe。
好吧,我是个白痴。问题不在于渲染器的实现,而在于着色器。查看 HDRGamma.frag 着色器(HDR Pass 使用的着色器),在 Shader/Source 目录中:
#version 430
#extension GL_ARB_separate_shader_objects : enable
#extension GL_ARB_shading_language_420pack : enable
layout (location = 0) out vec4 fragColor;
in FRAG_IN {
vec2 position;
vec2 uv;
} frag_in;
layout (set = 0, binding = 0) uniform sampler2D sceneSurface;
layout (set = 0, binding = 1) uniform sampler2D bloomSurface;
layout (set = 0, binding = 2) uniform HDR {
float gamma;
float exposure;
float pad[2];
int bloomEnabled;
int pad1[3];
} hdr;
你看到统一的 HDR 被填充成 4 个浮点数,4 个整数,我以为我正在对齐缓冲区,但事实并非如此,特别是因为在着色器中无法正确读取传递的 bloomEnabled。对于 Intel GPU,我仍然不确定为什么它在这里没问题,但对于 Nvidia,这不是一个正确的设计。如果我们这样打包缓冲区:
layout (set = 0, binding = 0) uniform sampler2D sceneSurface;
layout (set = 0, binding = 1) uniform sampler2D bloomSurface;
layout (set = 0, binding = 2) uniform HDR {
float gamma;
float exposure;
int bloomEnabled;
int pad1;
} hdr;
实现正确,gpu还是读取了16个字节,bloomEnabled可以被shader正常读取。事实证明,如果您没有正确对齐缓冲区,Nvidia 会做一些有趣的事情,但我仍然不确定为什么,另一方面,英特尔似乎有办法确保输出颜色附件的完整性。
不管怎样,现在一切似乎都很好。更新了源代码以查看工作实现。
过去一周我一直对此有疑问,因为我无法理解这到底是什么。我正在使用 Vulkan 创建游戏,但是在为其设置渲染器后,渲染纹理的纹理映射出现了一些尴尬的不准确,导致它缩小到屏幕的四分之一,这对我来说没有意义。不仅如此,当我将我的 Nvidia 驱动程序更新到 388.0 时,问题不仅没有消失,而且纹理也在做同样的事情: This is the result of the final image display on an Nvidia GTX 870M with driver v388.0
另请注意,您之前看到的屏幕纹理已 "scaled" 到屏幕的右下四分之一(好像它已被调整为 width/2 x height/2) , 这是不正确的...
渲染器的实现遵循一个前向渲染通道、一个 hdr 通道,然后是最终输出到交换链图像通道。 Forward pass 和 HDR pass 使用它们自己的命令缓冲区提交给图形队列,它们由信号量发出信号。
VkCommandBuffer offscreenCmd = mOffscreen.cmdBuffer->Handle();
VkSemaphore waitSemas[] = { mRhi->SwapchainObject()->ImageAvailableSemaphore() };
VkSemaphore signalSemas[] = { mOffscreen.semaphore->Handle() };
VkPipelineStageFlags waitFlags[] = { VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT };
VkSubmitInfo offscreenSI = {};
offscreenSI.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
offscreenSI.pCommandBuffers = &offscreenCmd;
offscreenSI.commandBufferCount = 1;
offscreenSI.signalSemaphoreCount = 1;
offscreenSI.pSignalSemaphores = signalSemas;
offscreenSI.waitSemaphoreCount = 1;
offscreenSI.pWaitSemaphores = waitSemas;
offscreenSI.pWaitDstStageMask = waitFlags;
VkSubmitInfo hdrSI = offscreenSI;
VkSemaphore hdrWaits[] = { mOffscreen.semaphore->Handle() };
VkSemaphore hdrSignal[] = { mHDR.semaphore->Handle() };
VkCommandBuffer hdrCmd = mHDR.cmdBuffer->Handle();
hdrSI.pCommandBuffers = &hdrCmd;
hdrSI.pSignalSemaphores = hdrSignal;
hdrSI.pWaitSemaphores = hdrWaits;
// Update materials before rendering the frame.
UpdateMaterials();
// begin frame. This is where we start our render process per frame.
BeginFrame();
while (mOffscreen.cmdBuffer->Recording() || !mRhi->CmdBuffersComplete()) {}
// Offscreen PBR Forward Rendering Pass.
mRhi->GraphicsSubmit(offscreenSI);
// High Dynamic Range and Gamma Pass.
mRhi->GraphicsSubmit(hdrSI);
// Before calling this cmd buffer, we want to submit our offscreen buffer first, then
// ssent our signal to our swapchain cmd buffers.
VkSemaphore waitSemaphores[] = { mHDR.semaphore->Handle() };
mRhi->SubmitCurrSwapchainCmdBuffer(1, waitSemaphores);
// Render the Overlay.
RenderOverlay();
EndFrame();
更有趣的是,当我 运行 在具有第 6 代 gpu 和 vulkan 支持的 Intel Kaby Lake cpu 上使用相同的代码时,the output image was exactly correct, as expected!
所以我不确定这是否是驱动程序错误:看看我是如何实现渲染纹理的:
void Renderer::SetUpRenderTextures()
{
Texture* pbrColor = mRhi->CreateTexture();
Texture* pbrDepth = mRhi->CreateTexture();
Sampler* pbrSampler = mRhi->CreateSampler();
Texture* hdrTexture = mRhi->CreateTexture();
Sampler* hdrSampler = mRhi->CreateSampler();
gResources().RegisterSampler("HDRGammaSampler", hdrSampler);
gResources().RegisterRenderTexture("HDRGammaTexture", hdrTexture);
gResources().RegisterRenderTexture("PBRColor", pbrColor);
gResources().RegisterRenderTexture("PBRDepth", pbrDepth);
gResources().RegisterSampler("PBRSampler", pbrSampler);
VkImageCreateInfo cImageInfo = { };
VkImageViewCreateInfo cViewInfo = { };
cImageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
cImageInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
cImageInfo.imageType = VK_IMAGE_TYPE_2D;
cImageInfo.format = VK_FORMAT_R16G16B16A16_SFLOAT;
cImageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
cImageInfo.mipLevels = 1;
cImageInfo.extent.depth = 1;
cImageInfo.arrayLayers = 1;
cImageInfo.extent.width = mWindowHandle->Width();
cImageInfo.extent.height = mWindowHandle->Height();
cImageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
cImageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
cImageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
cViewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
cViewInfo.format = VK_FORMAT_R16G16B16A16_SFLOAT;
cViewInfo.image = nullptr; // No need to set the image, texture->Initialize() handles this for us.
cViewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D;
cViewInfo.subresourceRange = { };
cViewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
cViewInfo.subresourceRange.baseArrayLayer = 0;
cViewInfo.subresourceRange.baseMipLevel = 0;
cViewInfo.subresourceRange.layerCount = 1;
cViewInfo.subresourceRange.levelCount = 1;
pbrColor->Initialize(cImageInfo, cViewInfo);
// Using the same info, only we are chaning the format to rgba8 unorm attachments
cImageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
cViewInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
hdrTexture->Initialize(cImageInfo, cViewInfo);
cImageInfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
cImageInfo.format = VK_FORMAT_D24_UNORM_S8_UINT;
cViewInfo.format = VK_FORMAT_D24_UNORM_S8_UINT;
cViewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
pbrDepth->Initialize(cImageInfo, cViewInfo);
VkSamplerCreateInfo samplerCI = { };
samplerCI.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
samplerCI.magFilter = VK_FILTER_LINEAR;
samplerCI.minFilter = VK_FILTER_LINEAR;
samplerCI.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
samplerCI.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
samplerCI.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
samplerCI.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
samplerCI.compareEnable = VK_FALSE;
samplerCI.mipLodBias = 0.0f;
samplerCI.maxAnisotropy = 16.0f;
samplerCI.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
samplerCI.maxLod = 1.0f;
samplerCI.minLod = 0.0f;
samplerCI.unnormalizedCoordinates = VK_FALSE;
pbrSampler->Initialize(samplerCI);
hdrSampler->Initialize(samplerCI);
Sampler* defaultSampler = mRhi->CreateSampler();
defaultSampler->Initialize(samplerCI);
gResources().RegisterSampler("DefaultSampler", defaultSampler);
VkImageCreateInfo dImageInfo = {};
VkImageViewCreateInfo dViewInfo = {};
dImageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
dImageInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
dImageInfo.imageType = VK_IMAGE_TYPE_2D;
dImageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
dImageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
dImageInfo.mipLevels = 1;
dImageInfo.extent.depth = 1;
dImageInfo.arrayLayers = 1;
dImageInfo.extent.width = mWindowHandle->Width();
dImageInfo.extent.height = mWindowHandle->Height();
dImageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
dImageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
dImageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
dViewInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
dViewInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
dViewInfo.image = nullptr; // No need to set the image, texture handles this for us.
dViewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D;
dViewInfo.subresourceRange = {};
dViewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
dViewInfo.subresourceRange.baseArrayLayer = 0;
dViewInfo.subresourceRange.baseMipLevel = 0;
dViewInfo.subresourceRange.layerCount = 1;
dViewInfo.subresourceRange.levelCount = 1;
Texture* defaultTexture = mRhi->CreateTexture();
defaultTexture->Initialize(dImageInfo, dViewInfo);
gResources().RegisterRenderTexture("DefaultTexture", defaultTexture);
}
和帧缓冲区:
void Renderer::SetUpFrameBuffers()
{
Texture* pbrColor = gResources().GetRenderTexture("PBRColor");
Texture* pbrDepth = gResources().GetRenderTexture("PBRDepth");
FrameBuffer* pbrFrameBuffer = mRhi->CreateFrameBuffer();
gResources().RegisterFrameBuffer("PBRFrameBuffer", pbrFrameBuffer);
FrameBuffer* hdrFrameBuffer = mRhi->CreateFrameBuffer();
gResources().RegisterFrameBuffer("HDRGammaFrameBuffer", hdrFrameBuffer);
VkAttachmentDescription attachmentDescriptions[2];
attachmentDescriptions[0].format = VK_FORMAT_R16G16B16A16_SFLOAT;
attachmentDescriptions[0].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
attachmentDescriptions[0].finalLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
attachmentDescriptions[0].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
attachmentDescriptions[0].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachmentDescriptions[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachmentDescriptions[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachmentDescriptions[0].samples = VK_SAMPLE_COUNT_1_BIT;
attachmentDescriptions[0].flags = 0;
attachmentDescriptions[1].format = VK_FORMAT_D24_UNORM_S8_UINT;
attachmentDescriptions[1].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
attachmentDescriptions[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachmentDescriptions[1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
attachmentDescriptions[1].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachmentDescriptions[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachmentDescriptions[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachmentDescriptions[1].samples = VK_SAMPLE_COUNT_1_BIT;
attachmentDescriptions[1].flags = 0;
VkSubpassDependency dependencies[2];
dependencies[0].srcSubpass = VK_SUBPASS_EXTERNAL;
dependencies[0].dstSubpass = 0;
dependencies[0].srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
dependencies[0].dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
dependencies[0].srcAccessMask = VK_ACCESS_MEMORY_READ_BIT;
dependencies[0].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
dependencies[0].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
dependencies[1].srcSubpass = 0;
dependencies[1].dstSubpass = VK_SUBPASS_EXTERNAL;
dependencies[1].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
dependencies[1].dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
dependencies[1].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
dependencies[1].dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
dependencies[1].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
VkAttachmentReference attachmentColorRef = { 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL };
VkAttachmentReference attachmentDepthRef = { 1, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL };
VkSubpassDescription subpass = { };
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass.colorAttachmentCount = 1;
subpass.pColorAttachments = &attachmentColorRef;
subpass.pDepthStencilAttachment = &attachmentDepthRef;
VkRenderPassCreateInfo renderpassCI = { };
renderpassCI.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
renderpassCI.attachmentCount = 2;
renderpassCI.pAttachments = attachmentDescriptions;
renderpassCI.subpassCount = 1;
renderpassCI.pSubpasses = &subpass;
renderpassCI.dependencyCount = 2;
renderpassCI.pDependencies = dependencies;
VkImageView attachments[2];
attachments[0] = pbrColor->View();
attachments[1] = pbrDepth->View();
VkFramebufferCreateInfo framebufferCI = {};
framebufferCI.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
framebufferCI.height = mWindowHandle->Height();
framebufferCI.width = mWindowHandle->Width();
framebufferCI.renderPass = nullptr; // The finalize call handles this for us.
framebufferCI.layers = 1;
framebufferCI.attachmentCount = 2;
framebufferCI.pAttachments = attachments;
pbrFrameBuffer->Finalize(framebufferCI, renderpassCI);
// No need to render any depth, as we are only writing on a 2d surface.
subpass.pDepthStencilAttachment = nullptr;
attachments[0] = gResources().GetRenderTexture("HDRGammaTexture")->View();
attachments[1] = nullptr;
framebufferCI.attachmentCount = 1;
attachmentDescriptions[0].format = VK_FORMAT_R8G8B8A8_UNORM;
renderpassCI.attachmentCount = 1;
hdrFrameBuffer->Finalize(framebufferCI, renderpassCI);
}
最后,看看纹理是如何初始化的:
void Sampler::Initialize(VkSamplerCreateInfo& info)
{
if (vkCreateSampler(mOwner, &info, nullptr, &mSampler) != VK_SUCCESS) {
R_DEBUG("ERROR: Sampler failed to initialize!\n");
}
}
void Sampler::CleanUp()
{
if (mSampler) {
vkDestroySampler(mOwner, mSampler, nullptr);
mSampler = VK_NULL_HANDLE;
}
}
void Texture::Initialize(const VkImageCreateInfo& imageInfo,
VkImageViewCreateInfo& viewInfo, b8 stream) // Ignore "stream" as it doesnt do anything yet...
{
if (vkCreateImage(mOwner, &imageInfo, nullptr, &mImage) != VK_SUCCESS) {
R_DEBUG("ERROR: Failed to create image!\n");
return;
}
VkMemoryRequirements memoryRequirements;
vkGetImageMemoryRequirements(mOwner, mImage, &memoryRequirements);
VkMemoryAllocateInfo allocInfo = { };
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocInfo.allocationSize = memoryRequirements.size;
allocInfo.memoryTypeIndex = VulkanRHI::gPhysicalDevice.FindMemoryType(memoryRequirements.memoryTypeBits,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
if (vkAllocateMemory(mOwner, &allocInfo, nullptr, &mMemory) != VK_SUCCESS) {
R_DEBUG("ERROR: Failed to allocate host memory for image!\n");
return;
}
if (vkBindImageMemory(mOwner, mImage, mMemory, 0) != VK_SUCCESS) {
R_DEBUG("ERROR: Failed to bind memory to image!\n");
return;
}
viewInfo.image = mImage;
if (vkCreateImageView(mOwner, &viewInfo, nullptr, &mView) != VK_SUCCESS) {
R_DEBUG("ERROR: Failed to create image view!\n");
}
}
void Texture::CleanUp()
{
if (mImage) {
vkDestroyImage(mOwner, mImage, nullptr);
mImage = VK_NULL_HANDLE;
}
if (mView) {
vkDestroyImageView(mOwner, mView, nullptr);
mView = VK_NULL_HANDLE;
}
if (mMemory) {
vkFreeMemory(mOwner, mMemory, nullptr);
mMemory = VK_NULL_HANDLE;
}
}
void Texture::Upload(VulkanRHI* rhi, Recluse::Image const& image)
{
VkDeviceSize imageSize = image.Width() * image.Height() * 4;
Buffer stagingBuffer;
stagingBuffer.SetOwner(mOwner);
VkBufferCreateInfo stagingCI = { };
stagingCI.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
stagingCI.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
stagingCI.size = imageSize;
stagingCI.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
stagingBuffer.Initialize(stagingCI, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
VkResult result = stagingBuffer.Map();
memcpy(stagingBuffer.Mapped(), image.Data(), imageSize);
stagingBuffer.UnMap();
CommandBuffer buffer;
buffer.SetOwner(mOwner);
buffer.Allocate(rhi->GraphicsCmdPool(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
VkCommandBufferBeginInfo beginInfo = { };
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
// TODO(): Copy buffer to image stream.
buffer.Begin(beginInfo);
VkImageMemoryBarrier imgBarrier = { };
imgBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
imgBarrier.image = mImage;
imgBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
imgBarrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
imgBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imgBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imgBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
imgBarrier.srcAccessMask = 0;
imgBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
imgBarrier.subresourceRange.baseArrayLayer = 0;
imgBarrier.subresourceRange.baseMipLevel = 0;
imgBarrier.subresourceRange.layerCount = 1;
imgBarrier.subresourceRange.levelCount = 1;
// Image memory barrier.
buffer.PipelineBarrier(
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0,
0, nullptr,
0, nullptr,
1, &imgBarrier
);
VkBufferImageCopy region = { };
region.bufferOffset = 0;
region.bufferImageHeight = 0;
region.bufferRowLength = 0;
region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
region.imageSubresource.baseArrayLayer = 0;
region.imageSubresource.layerCount = 1;
region.imageSubresource.mipLevel = 0;
region.imageExtent.width = image.Width();
region.imageExtent.height = image.Height();
region.imageExtent.depth = 1;
region.imageOffset = { 0, 0, 0 };
// Send buffer image copy cmd.
buffer.CopyBufferToImage(stagingBuffer.Handle(), mImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL , 1, ®ion);
imgBarrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
imgBarrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
imgBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
imgBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
buffer.PipelineBarrier(
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
0,
0, nullptr,
0, nullptr,
1, &imgBarrier
);
buffer.End();
// TODO(): Submit it to graphics queue!
VkCommandBuffer commandbuffers[] = { buffer.Handle() };
VkSubmitInfo submit = { };
submit.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submit.commandBufferCount = 1;
submit.pCommandBuffers = commandbuffers;
rhi->GraphicsSubmit(submit);
rhi->GraphicsWaitIdle();
buffer.Free();
stagingBuffer.CleanUp();
}
如果这是我显示的大量代码和信息,以及许多糟糕的设计决策,我深表歉意,但我的问题是这是否是驱动程序错误,或者我是否在我的代码中做了什么这是不正确的,以便显示具有适当纹理映射的最终输出?另外,我对这个问题也不是很了解,所以如果我对我试图解释的内容没有任何意义,我再次道歉。
使用 RenderDoc,我还获得了有关 nvidia 和 intel gpus 渲染管道中每个渲染通道的信息:
你可以在我的 github 中看到引擎的源代码,以获得渲染器的更大图片,它位于引擎目录中,它的实现在 this link
如果您正在寻找编译程序的简单 "works right out of the box" 版本,您可以在此处下载此 link: Recluse Zip
从那里解压并在Build64目录中找到Recluse.sln文件,然后编译并设置"Test"作为启动项目。或者您可以在 Release/Debug 输出目录中找到 exe。
好吧,我是个白痴。问题不在于渲染器的实现,而在于着色器。查看 HDRGamma.frag 着色器(HDR Pass 使用的着色器),在 Shader/Source 目录中:
#version 430
#extension GL_ARB_separate_shader_objects : enable
#extension GL_ARB_shading_language_420pack : enable
layout (location = 0) out vec4 fragColor;
in FRAG_IN {
vec2 position;
vec2 uv;
} frag_in;
layout (set = 0, binding = 0) uniform sampler2D sceneSurface;
layout (set = 0, binding = 1) uniform sampler2D bloomSurface;
layout (set = 0, binding = 2) uniform HDR {
float gamma;
float exposure;
float pad[2];
int bloomEnabled;
int pad1[3];
} hdr;
你看到统一的 HDR 被填充成 4 个浮点数,4 个整数,我以为我正在对齐缓冲区,但事实并非如此,特别是因为在着色器中无法正确读取传递的 bloomEnabled。对于 Intel GPU,我仍然不确定为什么它在这里没问题,但对于 Nvidia,这不是一个正确的设计。如果我们这样打包缓冲区:
layout (set = 0, binding = 0) uniform sampler2D sceneSurface;
layout (set = 0, binding = 1) uniform sampler2D bloomSurface;
layout (set = 0, binding = 2) uniform HDR {
float gamma;
float exposure;
int bloomEnabled;
int pad1;
} hdr;
实现正确,gpu还是读取了16个字节,bloomEnabled可以被shader正常读取。事实证明,如果您没有正确对齐缓冲区,Nvidia 会做一些有趣的事情,但我仍然不确定为什么,另一方面,英特尔似乎有办法确保输出颜色附件的完整性。
不管怎样,现在一切似乎都很好。更新了源代码以查看工作实现。