使用时间线信号量时的 vkQueueSubmit 块
vkQueueSubmit blocks when using timeline semaphores
我需要在两个 GPU 批次之间 运行 CPU 上的一个函数。为此,我使用时间线信号量。据我所知,vkQueueSubmit 不会阻塞。但是,当我提交这些 GPU 批次时它会阻塞:
uint64_t host_wait = timeline;
uint64_t host_signal = ++timeline;
uint64_t wait0 = timeline;
uint64_t signal0 = ++timeline;
uint64_t wait1 = timeline;
uint64_t signal1 = ++timeline;
VkPipelineStageFlags wait_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
VkTimelineSemaphoreSubmitInfo sp_submit0 = {
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
.waitSemaphoreValueCount = 1,
.pWaitSemaphoreValues = &wait0,
.signalSemaphoreValueCount = 1,
.pSignalSemaphoreValues = &signal0,
};
VkSubmitInfo submit0 = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = &sp_submit0,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &timeline_semaphore,
.pWaitDstStageMask = &wait_mask,
.signalSemaphoreCount = 1,
.pSignalSemaphores = &timeline_semaphore,
};
VkTimelineSemaphoreSubmitInfo sp_submit1 = {
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
.waitSemaphoreValueCount = 1,
.pWaitSemaphoreValues = &wait1,
.signalSemaphoreValueCount = 1,
.pSignalSemaphoreValues = &signal1,
};
VkSubmitInfo submit1 = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = &sp_submit1,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &timeline_semaphore,
.pWaitDstStageMask = &wait_mask,
.commandBufferCount = 1,
.pCommandBuffers = &command_buffer,
.signalSemaphoreCount = 1,
.pSignalSemaphores = &timeline_semaphore,
};
VkSubmitInfo infos[2] = { submit0, submit1 };
vkQueueSubmit(queue, 2, infos, fence);
// here vkQueueSubmit blocks the thread
WaitSemaphore(timeline_semaphore, host_wait);
some_function();
SignalSemaphore(timeline_semaphore, host_signal);
没有return它会阻塞几秒钟,我认为这有点像死锁。在调试器中,我看到来自 vkQueueSubmit 的 SleepEx 函数调用:vk_icdGetInstanceProcAddrSG -> ... -> SleepEx.
但是vkQueueSubmit在这个样本中没有阻塞(合并批处理):
uint64_t host_wait = timeline;
uint64_t host_signal = ++timeline;
uint64_t wait1 = timeline;
uint64_t signal1 = ++timeline;
VkPipelineStageFlags wait_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
VkTimelineSemaphoreSubmitInfo sp_submit1 = {
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
.waitSemaphoreValueCount = 1,
.pWaitSemaphoreValues = &wait1,
.signalSemaphoreValueCount = 1,
.pSignalSemaphoreValues = &signal1,
};
VkSubmitInfo submit1 = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = &sp_submit1,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &timeline_semaphore,
.pWaitDstStageMask = &wait_mask,
.commandBufferCount = 1,
.pCommandBuffers = &command_buffer,
.signalSemaphoreCount = 1,
.pSignalSemaphores = &timeline_semaphore,
};
VkSubmitInfo infos[1] = { submit1 };
vkQueueSubmit(queue, 1, infos, fence);
WaitSemaphore(timeline_semaphore, host_wait);
some_function();
SignalSemaphore(timeline_semaphore, host_signal);
为什么 vkQueueSubmit 在第一个代码示例中阻塞?这个问题的可能原因是什么?
我在 Windows 10 和 Radeon RX 570(驱动程序 20.4.2)上使用 Vulkan 1.2 (SDK 1.2.135)。
编辑:当我将命令缓冲区添加到 submit0
时,vkQueueSubmit 不会阻塞。这是驱动程序中的错误吗?
Doing something odd like submitting a batch with no work is far more likely to be the cause.
The spec doesn’t have performance requirements. The fact that something is legal does not make it a good idea. Broadly speaking, if there are two ways to do a thing, do it the obvious way. And sending an empty batch isn’t exactly obvious
https://community.khronos.org/t/vkqueuesubmit-blocks-when-using-timeline-semaphores/105704/2
我需要在两个 GPU 批次之间 运行 CPU 上的一个函数。为此,我使用时间线信号量。据我所知,vkQueueSubmit 不会阻塞。但是,当我提交这些 GPU 批次时它会阻塞:
uint64_t host_wait = timeline;
uint64_t host_signal = ++timeline;
uint64_t wait0 = timeline;
uint64_t signal0 = ++timeline;
uint64_t wait1 = timeline;
uint64_t signal1 = ++timeline;
VkPipelineStageFlags wait_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
VkTimelineSemaphoreSubmitInfo sp_submit0 = {
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
.waitSemaphoreValueCount = 1,
.pWaitSemaphoreValues = &wait0,
.signalSemaphoreValueCount = 1,
.pSignalSemaphoreValues = &signal0,
};
VkSubmitInfo submit0 = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = &sp_submit0,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &timeline_semaphore,
.pWaitDstStageMask = &wait_mask,
.signalSemaphoreCount = 1,
.pSignalSemaphores = &timeline_semaphore,
};
VkTimelineSemaphoreSubmitInfo sp_submit1 = {
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
.waitSemaphoreValueCount = 1,
.pWaitSemaphoreValues = &wait1,
.signalSemaphoreValueCount = 1,
.pSignalSemaphoreValues = &signal1,
};
VkSubmitInfo submit1 = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = &sp_submit1,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &timeline_semaphore,
.pWaitDstStageMask = &wait_mask,
.commandBufferCount = 1,
.pCommandBuffers = &command_buffer,
.signalSemaphoreCount = 1,
.pSignalSemaphores = &timeline_semaphore,
};
VkSubmitInfo infos[2] = { submit0, submit1 };
vkQueueSubmit(queue, 2, infos, fence);
// here vkQueueSubmit blocks the thread
WaitSemaphore(timeline_semaphore, host_wait);
some_function();
SignalSemaphore(timeline_semaphore, host_signal);
没有return它会阻塞几秒钟,我认为这有点像死锁。在调试器中,我看到来自 vkQueueSubmit 的 SleepEx 函数调用:vk_icdGetInstanceProcAddrSG -> ... -> SleepEx.
但是vkQueueSubmit在这个样本中没有阻塞(合并批处理):
uint64_t host_wait = timeline;
uint64_t host_signal = ++timeline;
uint64_t wait1 = timeline;
uint64_t signal1 = ++timeline;
VkPipelineStageFlags wait_mask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
VkTimelineSemaphoreSubmitInfo sp_submit1 = {
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
.waitSemaphoreValueCount = 1,
.pWaitSemaphoreValues = &wait1,
.signalSemaphoreValueCount = 1,
.pSignalSemaphoreValues = &signal1,
};
VkSubmitInfo submit1 = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.pNext = &sp_submit1,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &timeline_semaphore,
.pWaitDstStageMask = &wait_mask,
.commandBufferCount = 1,
.pCommandBuffers = &command_buffer,
.signalSemaphoreCount = 1,
.pSignalSemaphores = &timeline_semaphore,
};
VkSubmitInfo infos[1] = { submit1 };
vkQueueSubmit(queue, 1, infos, fence);
WaitSemaphore(timeline_semaphore, host_wait);
some_function();
SignalSemaphore(timeline_semaphore, host_signal);
为什么 vkQueueSubmit 在第一个代码示例中阻塞?这个问题的可能原因是什么? 我在 Windows 10 和 Radeon RX 570(驱动程序 20.4.2)上使用 Vulkan 1.2 (SDK 1.2.135)。
编辑:当我将命令缓冲区添加到 submit0
时,vkQueueSubmit 不会阻塞。这是驱动程序中的错误吗?
Doing something odd like submitting a batch with no work is far more likely to be the cause.
The spec doesn’t have performance requirements. The fact that something is legal does not make it a good idea. Broadly speaking, if there are two ways to do a thing, do it the obvious way. And sending an empty batch isn’t exactly obvious
https://community.khronos.org/t/vkqueuesubmit-blocks-when-using-timeline-semaphores/105704/2