从 CUDA 回调中排队异步副本 - 不允许?
Enqueueing an async copy from a CUDA callback - not permitted?
这个节目:
#include <string>
#include <stdexcept>
struct buffers_t {
void* host_buffer;
void* device_buffer;
};
void ensure_no_error(std::string message) {
auto status = cudaGetLastError();
if (status != cudaSuccess) {
throw std::runtime_error(message + ": " + cudaGetErrorString(status));
}
}
void my_callback(cudaStream_t stream, cudaError_t status, void* args) {
auto buffers = (buffers_t *) args;
cudaMemcpyAsync(
buffers->host_buffer, buffers->device_buffer,
1, cudaMemcpyDefault, stream);
ensure_no_error("after cudaMemcpyAsync");
}
int main() {
cudaStream_t stream;
cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
buffers_t buffers;
cudaMallocHost(&buffers.host_buffer, 1);
cudaMalloc(&buffers.device_buffer, 1);
cudaStreamAddCallback(stream, my_callback, &buffers, 0);
ensure_no_error("after enqueue callback");
cudaStreamSynchronize(stream);
ensure_no_error("after sync");
}
产量:
terminate called after throwing an instance of 'std::runtime_error'
what(): after cudaMemcpyAsync: operation not permitted
Aborted
这有点奇怪,因为 cudaMemcpyAsync
的 API 参考没有将 cudaErrorNotPermitted
列为潜在错误之一。从回调中调度异步副本真的有问题吗?
注意:我的机器有 GTX 650 Ti (CC 3.0),CUDA 9.0,Linux内核 4.8.0,驱动程序 384.59。
Is there really a problem with scheduling an async copy from a callback?
来自 documentation 流回调:
A callback must not make CUDA API calls (directly or indirectly), as it might end up waiting on itself if it makes such a call leading to a deadlock.
这个节目:
#include <string>
#include <stdexcept>
struct buffers_t {
void* host_buffer;
void* device_buffer;
};
void ensure_no_error(std::string message) {
auto status = cudaGetLastError();
if (status != cudaSuccess) {
throw std::runtime_error(message + ": " + cudaGetErrorString(status));
}
}
void my_callback(cudaStream_t stream, cudaError_t status, void* args) {
auto buffers = (buffers_t *) args;
cudaMemcpyAsync(
buffers->host_buffer, buffers->device_buffer,
1, cudaMemcpyDefault, stream);
ensure_no_error("after cudaMemcpyAsync");
}
int main() {
cudaStream_t stream;
cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking);
buffers_t buffers;
cudaMallocHost(&buffers.host_buffer, 1);
cudaMalloc(&buffers.device_buffer, 1);
cudaStreamAddCallback(stream, my_callback, &buffers, 0);
ensure_no_error("after enqueue callback");
cudaStreamSynchronize(stream);
ensure_no_error("after sync");
}
产量:
terminate called after throwing an instance of 'std::runtime_error'
what(): after cudaMemcpyAsync: operation not permitted
Aborted
这有点奇怪,因为 cudaMemcpyAsync
的 API 参考没有将 cudaErrorNotPermitted
列为潜在错误之一。从回调中调度异步副本真的有问题吗?
注意:我的机器有 GTX 650 Ti (CC 3.0),CUDA 9.0,Linux内核 4.8.0,驱动程序 384.59。
Is there really a problem with scheduling an async copy from a callback?
来自 documentation 流回调:
A callback must not make CUDA API calls (directly or indirectly), as it might end up waiting on itself if it makes such a call leading to a deadlock.