从内核直接写入 D3D 纹理
Direct write to D3D texture from kernel
我正在使用来自 NVIDIA CUDA 样本的 NVDEC H.264 解码器,我发现的一件事是一旦帧被解码,它就会从 NV12 转换为在 CUDA 端分配的 BGRA 缓冲区,然后这个缓冲区复制到 D3D BGRA 纹理。
我发现这在内存使用方面不是很有效,并且想用这个内核将 NV12 帧直接转换为 D3D 纹理:
void Nv12ToBgra32(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix)
因此,创建 D3D 纹理(BGRA,D3D11_USAGE_DEFAULT,D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS,D3D11_CPU_ACCESS_WRITE,1 个 mipmap),
然后在CUDA端注册写入:
//Register
ck(cuGraphicsD3D11RegisterResource(&cuTexResource, textureResource, CU_GRAPHICS_REGISTER_FLAGS_NONE));
...
//Write output:
CUarray retArray;
ck(cuGraphicsMapResources(1, &cuTexResource, 0));
ck(cuGraphicsSubResourceGetMappedArray(&retArray, cuTexResource, 0, 0));
/*
yuvFramePtr (NV12) is uint8_t* from decoded frame,
it's stored within CUDA memory I believe
*/
Nv12ToBgra32(yuvFramePtr, w, (uint8_t*)retArray, 4 * w, w, h);
ck(cuGraphicsUnmapResources(1, &cuTexResource, 0));
一旦内核被调用,我就崩溃了。可能是因为误用了 CUarray,谁能解释一下如何使用 cuGraphicsSubResourceGetMappedArray 的输出从 CUDA 内核写入纹理内存? (因为只需要写入raw memory,不需要处理correct clamp, filtering and value scaling)
好的,对于那些在“如何从 CUDA 内核编写 D3D11 纹理”问题上苦苦挣扎的人来说,方法如下:
使用 D3D11_BIND_UNORDERED_ACCESS 创建 D3D 纹理。
然后,注册资源:
//ID3D11Texture2D *textureResource from D3D texture
CUgraphicsResource cuTexResource;
ck(cuGraphicsD3D11RegisterResource(&cuTexResource, textureResource, CU_GRAPHICS_REGISTER_FLAGS_NONE));
//You can also add write-discard if texture will be fully written by kernel
ck(cuGraphicsResourceSetMapFlags(cuTexResource, CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD));
创建并注册纹理后,我们就可以将其用作写入表面。
ck(cuGraphicsMapResources(1, &cuTexResource, 0));
//Get array for first mip-map
CUArray retArray;
ck(cuGraphicsSubResourceGetMappedArray(&retArray, cuTexResource, 0, 0));
//Create surface from texture
CUsurfObject surf;
CUDA_RESOURCE_DESC surfDesc{};
surfDesc.res.array.hArray = retArray;
surfDesc.resType = CU_RESOURCE_TYPE_ARRAY;
ck(cuSurfObjectCreate(&surf, &surfDesc));
/*
Kernel declaration is:
void Nv12ToBgra32Surf(uint8_t* dpNv12, int nNv12Pitch, cudaSurfaceObject_t surf, int nBgraPitch, int nWidth, int nHeight, int iMatrix)
Surface write:
surf2Dwrite<uint>(VALUE, surf, x * sizeof(uint), y);
For BGRA surface we are writing uint, X offset is in bytes,
so multiply it with byte-size of type.
Run kernel:
*/
Nv12ToBgra32Surf(yuvFramePtr, w, /*out*/surf, 4 * w, w, h);
ck(cuGraphicsUnmapResources(1, &cuTexResource, 0));
ck(cuSurfObjectDestroy(surf));
我正在使用来自 NVIDIA CUDA 样本的 NVDEC H.264 解码器,我发现的一件事是一旦帧被解码,它就会从 NV12 转换为在 CUDA 端分配的 BGRA 缓冲区,然后这个缓冲区复制到 D3D BGRA 纹理。
我发现这在内存使用方面不是很有效,并且想用这个内核将 NV12 帧直接转换为 D3D 纹理:
void Nv12ToBgra32(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight, int iMatrix)
因此,创建 D3D 纹理(BGRA,D3D11_USAGE_DEFAULT,D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS,D3D11_CPU_ACCESS_WRITE,1 个 mipmap), 然后在CUDA端注册写入:
//Register
ck(cuGraphicsD3D11RegisterResource(&cuTexResource, textureResource, CU_GRAPHICS_REGISTER_FLAGS_NONE));
...
//Write output:
CUarray retArray;
ck(cuGraphicsMapResources(1, &cuTexResource, 0));
ck(cuGraphicsSubResourceGetMappedArray(&retArray, cuTexResource, 0, 0));
/*
yuvFramePtr (NV12) is uint8_t* from decoded frame,
it's stored within CUDA memory I believe
*/
Nv12ToBgra32(yuvFramePtr, w, (uint8_t*)retArray, 4 * w, w, h);
ck(cuGraphicsUnmapResources(1, &cuTexResource, 0));
一旦内核被调用,我就崩溃了。可能是因为误用了 CUarray,谁能解释一下如何使用 cuGraphicsSubResourceGetMappedArray 的输出从 CUDA 内核写入纹理内存? (因为只需要写入raw memory,不需要处理correct clamp, filtering and value scaling)
好的,对于那些在“如何从 CUDA 内核编写 D3D11 纹理”问题上苦苦挣扎的人来说,方法如下:
使用 D3D11_BIND_UNORDERED_ACCESS 创建 D3D 纹理。 然后,注册资源:
//ID3D11Texture2D *textureResource from D3D texture
CUgraphicsResource cuTexResource;
ck(cuGraphicsD3D11RegisterResource(&cuTexResource, textureResource, CU_GRAPHICS_REGISTER_FLAGS_NONE));
//You can also add write-discard if texture will be fully written by kernel
ck(cuGraphicsResourceSetMapFlags(cuTexResource, CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD));
创建并注册纹理后,我们就可以将其用作写入表面。
ck(cuGraphicsMapResources(1, &cuTexResource, 0));
//Get array for first mip-map
CUArray retArray;
ck(cuGraphicsSubResourceGetMappedArray(&retArray, cuTexResource, 0, 0));
//Create surface from texture
CUsurfObject surf;
CUDA_RESOURCE_DESC surfDesc{};
surfDesc.res.array.hArray = retArray;
surfDesc.resType = CU_RESOURCE_TYPE_ARRAY;
ck(cuSurfObjectCreate(&surf, &surfDesc));
/*
Kernel declaration is:
void Nv12ToBgra32Surf(uint8_t* dpNv12, int nNv12Pitch, cudaSurfaceObject_t surf, int nBgraPitch, int nWidth, int nHeight, int iMatrix)
Surface write:
surf2Dwrite<uint>(VALUE, surf, x * sizeof(uint), y);
For BGRA surface we are writing uint, X offset is in bytes,
so multiply it with byte-size of type.
Run kernel:
*/
Nv12ToBgra32Surf(yuvFramePtr, w, /*out*/surf, 4 * w, w, h);
ck(cuGraphicsUnmapResources(1, &cuTexResource, 0));
ck(cuSurfObjectDestroy(surf));