10MB 图像使用 500MB RAM
10MB Image Using 500MB RAM
我的 Vulkan 应用程序存在内存问题。加载 10mb 的 PNG 图像使用了将近 500mb 的内存。
TextureObject* createTextureImage(const char* File) {
auto Tex = Textures.emplace_back(new TextureObject(_Driver));
//decode
unsigned error = lodepng::decode(Tex->Pixels, Tex->Width, Tex->Height, File);
//if there's an error, display it
if (error) printf("PNG Decoder error: (%i) %s", error, lodepng_error_text(error));
Tex->Empty = false;
VkDeviceSize imageSize = Tex->Width * Tex->Height * 4;
//
// Image Staging Buffer
VkBufferCreateInfo stagingBufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
stagingBufferInfo.size = imageSize;
stagingBufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
VmaAllocationCreateInfo allocInfo = {};
allocInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
allocInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VkBuffer stagingImageBuffer = VK_NULL_HANDLE;
VmaAllocation stagingImageBufferAlloc = VK_NULL_HANDLE;
vmaCreateBuffer(_Driver->allocator, &stagingBufferInfo, &allocInfo, &stagingImageBuffer, &stagingImageBufferAlloc, nullptr);
memcpy(stagingImageBufferAlloc->GetMappedData(), Tex->Pixels.data(), static_cast<size_t>(imageSize));
Tex->Pixels.clear();
VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
imageInfo.imageType = VK_IMAGE_TYPE_2D;
imageInfo.extent.width = static_cast<uint32_t>(Tex->Width);
imageInfo.extent.height = static_cast<uint32_t>(Tex->Height);
imageInfo.extent.depth = 1;
imageInfo.mipLevels = 1;
imageInfo.arrayLayers = 1;
imageInfo.format = VK_FORMAT_B8G8R8A8_SRGB;
imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
imageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
imageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
VmaAllocationInfo imageBufferAllocInfo = {};
vmaCreateImage(_Driver->allocator, &imageInfo, &allocInfo, &Tex->Image, &Tex->Allocation, nullptr);
//
// CPU->GPU Copy
VkCommandBuffer commandBuffer = _Driver->_SceneGraph->beginSingleTimeCommands();
VkImageMemoryBarrier imgMemBarrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER };
imgMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imgMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imgMemBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
imgMemBarrier.subresourceRange.baseMipLevel = 0;
imgMemBarrier.subresourceRange.levelCount = 1;
imgMemBarrier.subresourceRange.baseArrayLayer = 0;
imgMemBarrier.subresourceRange.layerCount = 1;
imgMemBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
imgMemBarrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
imgMemBarrier.image = Tex->Image;
imgMemBarrier.srcAccessMask = 0;
imgMemBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
vkCmdPipelineBarrier(
commandBuffer,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT,
0,
0, nullptr,
0, nullptr,
1, &imgMemBarrier);
VkBufferImageCopy region = {};
region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
region.imageSubresource.layerCount = 1;
region.imageExtent.width = static_cast<uint32_t>(Tex->Width);
region.imageExtent.height = static_cast<uint32_t>(Tex->Height);
region.imageExtent.depth = 1;
vkCmdCopyBufferToImage(commandBuffer, stagingImageBuffer, Tex->Image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion);
imgMemBarrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
imgMemBarrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
imgMemBarrier.image = Tex->Image;
imgMemBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
imgMemBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
vkCmdPipelineBarrier(
commandBuffer,
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
0,
0, nullptr,
0, nullptr,
1, &imgMemBarrier);
_Driver->_SceneGraph->endSingleTimeCommands(commandBuffer);
vmaDestroyBuffer(_Driver->allocator, stagingImageBuffer, stagingImageBufferAlloc);
VkImageViewCreateInfo textureImageViewInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
textureImageViewInfo.image = Tex->Image;
textureImageViewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D;
textureImageViewInfo.format = VK_FORMAT_B8G8R8A8_SRGB;
textureImageViewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
textureImageViewInfo.subresourceRange.baseMipLevel = 0;
textureImageViewInfo.subresourceRange.levelCount = 1;
textureImageViewInfo.subresourceRange.baseArrayLayer = 0;
textureImageViewInfo.subresourceRange.layerCount = 1;
vkCreateImageView(_Driver->device, &textureImageViewInfo, nullptr, &Tex->ImageView);
VkSamplerCreateInfo samplerInfo = { VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO };
samplerInfo.magFilter = VK_FILTER_LINEAR;
samplerInfo.minFilter = VK_FILTER_LINEAR;
samplerInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT;
samplerInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT;
samplerInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT;
samplerInfo.anisotropyEnable = VK_TRUE;
samplerInfo.maxAnisotropy = 16;
samplerInfo.borderColor = VK_BORDER_COLOR_INT_OPAQUE_BLACK;
samplerInfo.unnormalizedCoordinates = VK_FALSE;
samplerInfo.compareEnable = VK_FALSE;
samplerInfo.compareOp = VK_COMPARE_OP_ALWAYS;
samplerInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
samplerInfo.mipLodBias = 0.0f;
samplerInfo.minLod = 0.0f;
samplerInfo.maxLod = 0.0f;
if (vkCreateSampler(_Driver->device, &samplerInfo, nullptr, &Tex->Sampler) != VK_SUCCESS) {
#ifdef _DEBUG
throw std::runtime_error("failed to create texture sampler!");
#endif
}
return Tex;
}
我已经将分配范围缩小到发生在这个函数中。只需将正在加载的图像从 10mb png 交换到 1kb png 就可以大大减少此函数调用期间的内存消耗。
我是不是做错了什么导致分配了大量内存却没有释放?
在每一行上放置断点显示大量分配发生在调用之后:
unsigned error = lodepng::decode(Tex->Pixels, Tex->Width, Tex->Height, File);
加载一个 10mb 的 png 文件如何使用超过 500mb 来加载?
lodepng::decode
在Tex
里面填充下面3个变量
unsigned int Width = 0;
unsigned int Height = 0;
std::vector<unsigned char> Pixels = {};
即使是 1kb 的 png 也需要大约 5mb 的加载量,这看起来非常高。
我也尝试过使用 stb_image 加载 png 文件,结果相同。
EDIT2:10mb 的 png 是 4096x4096,1kb 的 png 是 16x16。
EDIT2:经过一些内存分析后,在检查其中一个内存峰值后分配了占 563,000,000 字节的 58,000 个 void 实例,其中超过 500,000,000 个位于我调用 .clear()
的向量中。我不确定如何真正解除分配这个问题,或者它是否是一个大问题?
How can loading a 10mb png file use upwards of 500mb to load?
好吧,它是 10MB 的压缩数据。你扔在这里的所有缓冲区都是解压后的,所以每个 64MB ...
一个可能的泄漏是 vector.clear()
不能保证重新分配,所以当您在这里调用 clear()
释放对象时,您可能仍然坐在 64MB 的后备存储上,这是直到矢量被销毁才释放。
我的 Vulkan 应用程序存在内存问题。加载 10mb 的 PNG 图像使用了将近 500mb 的内存。
TextureObject* createTextureImage(const char* File) {
auto Tex = Textures.emplace_back(new TextureObject(_Driver));
//decode
unsigned error = lodepng::decode(Tex->Pixels, Tex->Width, Tex->Height, File);
//if there's an error, display it
if (error) printf("PNG Decoder error: (%i) %s", error, lodepng_error_text(error));
Tex->Empty = false;
VkDeviceSize imageSize = Tex->Width * Tex->Height * 4;
//
// Image Staging Buffer
VkBufferCreateInfo stagingBufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
stagingBufferInfo.size = imageSize;
stagingBufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
VmaAllocationCreateInfo allocInfo = {};
allocInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
allocInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
VkBuffer stagingImageBuffer = VK_NULL_HANDLE;
VmaAllocation stagingImageBufferAlloc = VK_NULL_HANDLE;
vmaCreateBuffer(_Driver->allocator, &stagingBufferInfo, &allocInfo, &stagingImageBuffer, &stagingImageBufferAlloc, nullptr);
memcpy(stagingImageBufferAlloc->GetMappedData(), Tex->Pixels.data(), static_cast<size_t>(imageSize));
Tex->Pixels.clear();
VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
imageInfo.imageType = VK_IMAGE_TYPE_2D;
imageInfo.extent.width = static_cast<uint32_t>(Tex->Width);
imageInfo.extent.height = static_cast<uint32_t>(Tex->Height);
imageInfo.extent.depth = 1;
imageInfo.mipLevels = 1;
imageInfo.arrayLayers = 1;
imageInfo.format = VK_FORMAT_B8G8R8A8_SRGB;
imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
imageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
imageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
VmaAllocationInfo imageBufferAllocInfo = {};
vmaCreateImage(_Driver->allocator, &imageInfo, &allocInfo, &Tex->Image, &Tex->Allocation, nullptr);
//
// CPU->GPU Copy
VkCommandBuffer commandBuffer = _Driver->_SceneGraph->beginSingleTimeCommands();
VkImageMemoryBarrier imgMemBarrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER };
imgMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imgMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imgMemBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
imgMemBarrier.subresourceRange.baseMipLevel = 0;
imgMemBarrier.subresourceRange.levelCount = 1;
imgMemBarrier.subresourceRange.baseArrayLayer = 0;
imgMemBarrier.subresourceRange.layerCount = 1;
imgMemBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
imgMemBarrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
imgMemBarrier.image = Tex->Image;
imgMemBarrier.srcAccessMask = 0;
imgMemBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
vkCmdPipelineBarrier(
commandBuffer,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT,
0,
0, nullptr,
0, nullptr,
1, &imgMemBarrier);
VkBufferImageCopy region = {};
region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
region.imageSubresource.layerCount = 1;
region.imageExtent.width = static_cast<uint32_t>(Tex->Width);
region.imageExtent.height = static_cast<uint32_t>(Tex->Height);
region.imageExtent.depth = 1;
vkCmdCopyBufferToImage(commandBuffer, stagingImageBuffer, Tex->Image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion);
imgMemBarrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
imgMemBarrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
imgMemBarrier.image = Tex->Image;
imgMemBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
imgMemBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
vkCmdPipelineBarrier(
commandBuffer,
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
0,
0, nullptr,
0, nullptr,
1, &imgMemBarrier);
_Driver->_SceneGraph->endSingleTimeCommands(commandBuffer);
vmaDestroyBuffer(_Driver->allocator, stagingImageBuffer, stagingImageBufferAlloc);
VkImageViewCreateInfo textureImageViewInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
textureImageViewInfo.image = Tex->Image;
textureImageViewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D;
textureImageViewInfo.format = VK_FORMAT_B8G8R8A8_SRGB;
textureImageViewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
textureImageViewInfo.subresourceRange.baseMipLevel = 0;
textureImageViewInfo.subresourceRange.levelCount = 1;
textureImageViewInfo.subresourceRange.baseArrayLayer = 0;
textureImageViewInfo.subresourceRange.layerCount = 1;
vkCreateImageView(_Driver->device, &textureImageViewInfo, nullptr, &Tex->ImageView);
VkSamplerCreateInfo samplerInfo = { VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO };
samplerInfo.magFilter = VK_FILTER_LINEAR;
samplerInfo.minFilter = VK_FILTER_LINEAR;
samplerInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT;
samplerInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT;
samplerInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT;
samplerInfo.anisotropyEnable = VK_TRUE;
samplerInfo.maxAnisotropy = 16;
samplerInfo.borderColor = VK_BORDER_COLOR_INT_OPAQUE_BLACK;
samplerInfo.unnormalizedCoordinates = VK_FALSE;
samplerInfo.compareEnable = VK_FALSE;
samplerInfo.compareOp = VK_COMPARE_OP_ALWAYS;
samplerInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
samplerInfo.mipLodBias = 0.0f;
samplerInfo.minLod = 0.0f;
samplerInfo.maxLod = 0.0f;
if (vkCreateSampler(_Driver->device, &samplerInfo, nullptr, &Tex->Sampler) != VK_SUCCESS) {
#ifdef _DEBUG
throw std::runtime_error("failed to create texture sampler!");
#endif
}
return Tex;
}
我已经将分配范围缩小到发生在这个函数中。只需将正在加载的图像从 10mb png 交换到 1kb png 就可以大大减少此函数调用期间的内存消耗。
我是不是做错了什么导致分配了大量内存却没有释放?
在每一行上放置断点显示大量分配发生在调用之后:
unsigned error = lodepng::decode(Tex->Pixels, Tex->Width, Tex->Height, File);
加载一个 10mb 的 png 文件如何使用超过 500mb 来加载?
lodepng::decode
在Tex
unsigned int Width = 0;
unsigned int Height = 0;
std::vector<unsigned char> Pixels = {};
即使是 1kb 的 png 也需要大约 5mb 的加载量,这看起来非常高。
我也尝试过使用 stb_image 加载 png 文件,结果相同。
EDIT2:10mb 的 png 是 4096x4096,1kb 的 png 是 16x16。
EDIT2:经过一些内存分析后,在检查其中一个内存峰值后分配了占 563,000,000 字节的 58,000 个 void 实例,其中超过 500,000,000 个位于我调用 .clear()
的向量中。我不确定如何真正解除分配这个问题,或者它是否是一个大问题?
How can loading a 10mb png file use upwards of 500mb to load?
好吧,它是 10MB 的压缩数据。你扔在这里的所有缓冲区都是解压后的,所以每个 64MB ...
一个可能的泄漏是 vector.clear()
不能保证重新分配,所以当您在这里调用 clear()
释放对象时,您可能仍然坐在 64MB 的后备存储上,这是直到矢量被销毁才释放。