从 cudaBindTexture2D 读取
Read from cudaBindTexture2D
我一直在尝试将二维数组存储在纹理内存中并通过 cudaBindTexture2D 从中读取
但是返回的值为0,但我不确定这是否是对cudaBindTexture2D和tex2D();
的正确使用
我编写了一个非常简单的代码来试用它:
#include <cuda.h>
#include <stdio.h>
#include <stdlib.h>
texture<uint, cudaTextureType2D, cudaReadModeElementType> tex;
__global__
void texture2DTest(int *x){
*x = tex2D(tex,0,0);
}
void initTable(int textureTable[][9]){
int i=0;
int j=0;
for(i=0; i<10; i++){
for(j=0; j<9; j++){
textureTable[i][j]=0;
}
}
textureTable[0][0] = 12;
}
int main (int argc, char ** argv){
int textureTable[10][9];
int *d_x;
int x=2;
size_t pitch;
initTable(textureTable);
cudaMalloc(&d_x, sizeof(int));
cudaMemcpy(d_x, &x, sizeof(int), cudaMemcpyHostToDevice);
cudaMallocPitch( (void**)textureTable,&pitch, 9, 10);
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uint>();
cudaBindTexture2D(NULL, tex, textureTable, desc, 9, 10, pitch) ;
texture2DTest<<<1,1>>>(d_x);
cudaThreadSynchronize();
cudaMemcpy(&x,d_x, sizeof(int), cudaMemcpyDeviceToHost);
printf(" \n %d \n",x);
cudaUnbindTexture(tex);
return 0;
}
谢谢。
提供的代码中有不少问题。
使用cudaMallocPitch
的设备内存分配完全被破坏了。您正在尝试将设备内存分配给主机上已分配的二维数组。
尝试这样做会导致内存损坏和未定义的行为。设备内存分配需要一个单独的指针变量,分配后应将内存从主机复制到设备。
cudaMallocPitch
的第三个参数需要 字节的内存宽度;不是元素。
纹理只能绑定到设备内存,因此cudaBindTexture
需要设备内存指针作为输入。
解决上述所有问题后,您的最终 main
将如下所示:
int main (int argc, char ** argv)
{
int textureTable[10][9];
int *d_x;
int x = 2;
size_t pitch;
initTable(textureTable);
cudaMalloc(&d_x, sizeof(int));
cudaMemcpy(d_x, &x, sizeof(int), cudaMemcpyHostToDevice);
int* d_textureTable; //Device texture table
//Allocate pitch linear memory to device texture table
cudaMallocPitch((void**)&d_textureTable,&pitch, 9 * sizeof(int), 10);
//Use Memcpy2D as the pitch of host and device memory may be different
cudaMemcpy2D(d_textureTable, pitch, textureTable, 9 * sizeof(int), 9 *sizeof(int), 10, cudaMemcpyHostToDevice);
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uint>();
cudaBindTexture2D(NULL, tex, d_textureTable, desc, 9, 10, pitch) ;
texture2DTest<<<1,1>>>(d_x);
cudaThreadSynchronize();
cudaMemcpy(&x,d_x, sizeof(int), cudaMemcpyDeviceToHost);
printf(" \n %d \n",x);
cudaUnbindTexture(tex);
//Don't forget to free the allocated memory
cudaFree(d_textureTable);
cudaFree(d_x);
return 0;
}
我一直在尝试将二维数组存储在纹理内存中并通过 cudaBindTexture2D 从中读取 但是返回的值为0,但我不确定这是否是对cudaBindTexture2D和tex2D();
的正确使用我编写了一个非常简单的代码来试用它:
#include <cuda.h>
#include <stdio.h>
#include <stdlib.h>
texture<uint, cudaTextureType2D, cudaReadModeElementType> tex;
__global__
void texture2DTest(int *x){
*x = tex2D(tex,0,0);
}
void initTable(int textureTable[][9]){
int i=0;
int j=0;
for(i=0; i<10; i++){
for(j=0; j<9; j++){
textureTable[i][j]=0;
}
}
textureTable[0][0] = 12;
}
int main (int argc, char ** argv){
int textureTable[10][9];
int *d_x;
int x=2;
size_t pitch;
initTable(textureTable);
cudaMalloc(&d_x, sizeof(int));
cudaMemcpy(d_x, &x, sizeof(int), cudaMemcpyHostToDevice);
cudaMallocPitch( (void**)textureTable,&pitch, 9, 10);
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uint>();
cudaBindTexture2D(NULL, tex, textureTable, desc, 9, 10, pitch) ;
texture2DTest<<<1,1>>>(d_x);
cudaThreadSynchronize();
cudaMemcpy(&x,d_x, sizeof(int), cudaMemcpyDeviceToHost);
printf(" \n %d \n",x);
cudaUnbindTexture(tex);
return 0;
}
谢谢。
提供的代码中有不少问题。
使用
cudaMallocPitch
的设备内存分配完全被破坏了。您正在尝试将设备内存分配给主机上已分配的二维数组。 尝试这样做会导致内存损坏和未定义的行为。设备内存分配需要一个单独的指针变量,分配后应将内存从主机复制到设备。cudaMallocPitch
的第三个参数需要 字节的内存宽度;不是元素。纹理只能绑定到设备内存,因此
cudaBindTexture
需要设备内存指针作为输入。
解决上述所有问题后,您的最终 main
将如下所示:
int main (int argc, char ** argv)
{
int textureTable[10][9];
int *d_x;
int x = 2;
size_t pitch;
initTable(textureTable);
cudaMalloc(&d_x, sizeof(int));
cudaMemcpy(d_x, &x, sizeof(int), cudaMemcpyHostToDevice);
int* d_textureTable; //Device texture table
//Allocate pitch linear memory to device texture table
cudaMallocPitch((void**)&d_textureTable,&pitch, 9 * sizeof(int), 10);
//Use Memcpy2D as the pitch of host and device memory may be different
cudaMemcpy2D(d_textureTable, pitch, textureTable, 9 * sizeof(int), 9 *sizeof(int), 10, cudaMemcpyHostToDevice);
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uint>();
cudaBindTexture2D(NULL, tex, d_textureTable, desc, 9, 10, pitch) ;
texture2DTest<<<1,1>>>(d_x);
cudaThreadSynchronize();
cudaMemcpy(&x,d_x, sizeof(int), cudaMemcpyDeviceToHost);
printf(" \n %d \n",x);
cudaUnbindTexture(tex);
//Don't forget to free the allocated memory
cudaFree(d_textureTable);
cudaFree(d_x);
return 0;
}