CUDA 纹理不能 return unsigned long long 的值类型
CUDA texture can't return value type of unsigned long long
我有代码:
#define int4 unsigned long long int
int4 mer_thread = tex2D(STexture, col, row);
printf("\nTexture[%d][%d] = %d", row, col, tex2D(STexture, col, row));
错误"error : no instance of overloaded function "tex2D"匹配参数列表"
但如果 define int4 unsigned long int
,它工作正常。
我的代码创建纹理:
void Creat_TexttureS(int4 _S[nmax][NMAX])
{
cudaArray* carray;
cudaChannelFormatDesc channel;
channel = cudaCreateChannelDesc<int4>();
cudaMallocArray(&carray, &channel, NMAX, nmax);
cudaMemcpyToArray(carray, 0, 0, _S, sizeof(int4)*NMAX*nmax, cudaMemcpyHostToDevice);
STexture.filterMode = cudaFilterModePoint;
STexture.addressMode[0] = cudaAddressModeWrap;
STexture.addressMode[1] = cudaAddressModeClamp;
cudaBindTextureToArray(STexture, carray);
}
感谢您的帮助!!
下面是一个工作示例,演示了在类型 int2
的二维纹理中存储类型 long long int
的数据,然后如何通过 tex2D()
检索它并重新解释它作为 long long int
.
#include <stdlib.h>
#include <stdio.h>
// Macro to catch CUDA errors in CUDA runtime calls
#define CUDA_SAFE_CALL(call) \
do { \
cudaError_t err = call; \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString(err) ); \
exit(EXIT_FAILURE); \
} \
} while (0)
// Macro to catch CUDA errors in kernel launches
#define CHECK_LAUNCH_ERROR() \
do { \
/* Check synchronous errors, i.e. pre-launch */ \
cudaError_t err = cudaGetLastError(); \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString(err) ); \
exit(EXIT_FAILURE); \
} \
/* Check asynchronous errors, i.e. kernel failed (ULF) */ \
err = cudaThreadSynchronize(); \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString( err) ); \
exit(EXIT_FAILURE); \
} \
} while (0)
__forceinline__ __device__ long long int int2_as_longlong (int2 a)
{
long long int res;
asm ("mov.b64 %0, {%1,%2};" : "=l"(res) : "r"(a.x), "r"(a.y));
return res;
}
texture<int2, 2, cudaReadModeElementType> tex;
__global__ void kernel (int m, int n)
{
int2 data;
for (int row = 0; row < m; row++) {
for (int col = 0; col < n; col++) {
data = tex2D (tex, col, row);
printf ("% 11lld ", int2_as_longlong (data));
}
printf ("\n");
}
}
int main (void)
{
int m = 4; // height = #rows
int n = 3; // width = #columns
size_t pitch, tex_ofs;
unsigned long long int arr[4][3]=
{{11111111LL, 11112222LL, 11113333LL},
{22221111LL, 22222222LL, 22223333LL},
{33331111LL, 33332222LL, 33333333LL},
{44441111LL, 44442222LL, 44443333LL}};
int2 *arr_d = 0;
CUDA_SAFE_CALL(cudaMallocPitch((void**)&arr_d,&pitch,n*sizeof(*arr_d),m));
CUDA_SAFE_CALL(cudaMemcpy2D(arr_d, pitch, arr, n*sizeof(arr[0][0]),
n*sizeof(arr[0][0]),m,cudaMemcpyHostToDevice));
CUDA_SAFE_CALL (cudaBindTexture2D (&tex_ofs, &tex, arr_d, &tex.channelDesc,
n, m, pitch));
if (tex_ofs !=0) {
printf ("tex_ofs = %zu\n", tex_ofs);
return EXIT_FAILURE;
}
printf ("printing texture content\n");
kernel<<<1,1>>>(m, n);
CHECK_LAUNCH_ERROR();
CUDA_SAFE_CALL (cudaUnbindTexture (tex));
CUDA_SAFE_CALL (cudaFree (arr_d));
return EXIT_SUCCESS;
}
我有代码:
#define int4 unsigned long long int
int4 mer_thread = tex2D(STexture, col, row);
printf("\nTexture[%d][%d] = %d", row, col, tex2D(STexture, col, row));
错误"error : no instance of overloaded function "tex2D"匹配参数列表"
但如果 define int4 unsigned long int
,它工作正常。
我的代码创建纹理:
void Creat_TexttureS(int4 _S[nmax][NMAX])
{
cudaArray* carray;
cudaChannelFormatDesc channel;
channel = cudaCreateChannelDesc<int4>();
cudaMallocArray(&carray, &channel, NMAX, nmax);
cudaMemcpyToArray(carray, 0, 0, _S, sizeof(int4)*NMAX*nmax, cudaMemcpyHostToDevice);
STexture.filterMode = cudaFilterModePoint;
STexture.addressMode[0] = cudaAddressModeWrap;
STexture.addressMode[1] = cudaAddressModeClamp;
cudaBindTextureToArray(STexture, carray);
}
感谢您的帮助!!
下面是一个工作示例,演示了在类型 int2
的二维纹理中存储类型 long long int
的数据,然后如何通过 tex2D()
检索它并重新解释它作为 long long int
.
#include <stdlib.h>
#include <stdio.h>
// Macro to catch CUDA errors in CUDA runtime calls
#define CUDA_SAFE_CALL(call) \
do { \
cudaError_t err = call; \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString(err) ); \
exit(EXIT_FAILURE); \
} \
} while (0)
// Macro to catch CUDA errors in kernel launches
#define CHECK_LAUNCH_ERROR() \
do { \
/* Check synchronous errors, i.e. pre-launch */ \
cudaError_t err = cudaGetLastError(); \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString(err) ); \
exit(EXIT_FAILURE); \
} \
/* Check asynchronous errors, i.e. kernel failed (ULF) */ \
err = cudaThreadSynchronize(); \
if (cudaSuccess != err) { \
fprintf (stderr, "Cuda error in file '%s' in line %i : %s.\n",\
__FILE__, __LINE__, cudaGetErrorString( err) ); \
exit(EXIT_FAILURE); \
} \
} while (0)
__forceinline__ __device__ long long int int2_as_longlong (int2 a)
{
long long int res;
asm ("mov.b64 %0, {%1,%2};" : "=l"(res) : "r"(a.x), "r"(a.y));
return res;
}
texture<int2, 2, cudaReadModeElementType> tex;
__global__ void kernel (int m, int n)
{
int2 data;
for (int row = 0; row < m; row++) {
for (int col = 0; col < n; col++) {
data = tex2D (tex, col, row);
printf ("% 11lld ", int2_as_longlong (data));
}
printf ("\n");
}
}
int main (void)
{
int m = 4; // height = #rows
int n = 3; // width = #columns
size_t pitch, tex_ofs;
unsigned long long int arr[4][3]=
{{11111111LL, 11112222LL, 11113333LL},
{22221111LL, 22222222LL, 22223333LL},
{33331111LL, 33332222LL, 33333333LL},
{44441111LL, 44442222LL, 44443333LL}};
int2 *arr_d = 0;
CUDA_SAFE_CALL(cudaMallocPitch((void**)&arr_d,&pitch,n*sizeof(*arr_d),m));
CUDA_SAFE_CALL(cudaMemcpy2D(arr_d, pitch, arr, n*sizeof(arr[0][0]),
n*sizeof(arr[0][0]),m,cudaMemcpyHostToDevice));
CUDA_SAFE_CALL (cudaBindTexture2D (&tex_ofs, &tex, arr_d, &tex.channelDesc,
n, m, pitch));
if (tex_ofs !=0) {
printf ("tex_ofs = %zu\n", tex_ofs);
return EXIT_FAILURE;
}
printf ("printing texture content\n");
kernel<<<1,1>>>(m, n);
CHECK_LAUNCH_ERROR();
CUDA_SAFE_CALL (cudaUnbindTexture (tex));
CUDA_SAFE_CALL (cudaFree (arr_d));
return EXIT_SUCCESS;
}