CUDA - 将纹理从 int 转换为 int4

CUDA - Convert texture from int to int4

我想问一下这个贴图怎么转换:

texture<int, 2, cudaReadModeElementType> text1;

texture<int4, 2, cudaReadModeElementType> text2;

所以我的问题是索引。例如,假设我像这样访问 text1 中的第 10 列第 5 行

int col 10;
int row 5;
int tex2d(text1, col, row);

但如果我这样做,显然我访问的不是相同的数据:

int col 10;
int row 5;
int4 tex2d(text2, col, row);

所以,我尝试将 col / 4 和 row /4 分开,但我也没有成功。我认为我应该使用 DIV 和 MOD,但我不知道如何使用。

有谁知道如何使用4通道正确访问?非常感谢!

在这两种情况之间,您唯一需要修改的索引是水平 (x) 索引。对于 int4 情况,水平索引可以除以 4(与 int 情况相比),但它将检索 4 个值。这是一个完整的例子:

$ cat t1918.cu
#include <helper_cuda.h>
#include <cstdio>
#define HEIGHT  7680
#ifndef USE_INT4
#define WIDTH   7245
typedef int it;
#else
#define WIDTH   1812
typedef int4 it;
#endif

cudaArray * Array_Device;
texture<it, 2,cudaReadModeElementType> Image;

__global__ void k(int x, int y)
{
  int w;
#ifdef USE_INT4
  w = WIDTH*4;
#else
  w = WIDTH;
#endif
  for (y = 0; y < HEIGHT; y++)
   for (x = 0; x < w; x++){
    int nx=x, no=0;
#ifdef USE_INT4
    no = x&3;   //modulo by 4
    nx >>= 2;   //division by 4
#endif
    it val = tex2D(Image,nx,y);
    int rval = reinterpret_cast<int *>(&val)[no];
    if (rval != y*10000+x) {
      printf("mismatch at %d, %d, was: %d, should be: %d\n", x,y, rval, y*10000+x);
      return;
      }
    }
}


void p()
{
        it *h = new it[WIDTH*HEIGHT];
        // this dataset and test-case only works for textures up to width of 9999 for int or 2499 for int4
        for (int i = 0; i < HEIGHT; i++)
          for (int j = 0; j < WIDTH; j++){
#ifndef USE_INT4
            h[i*WIDTH+j] = i*10000+j;
#else
            h[i*WIDTH+j] = {i*10000+j*4+0, i*10000+j*4+1, i*10000+j*4+2, i*10000+j*4+3};
#endif
            }
        cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<it>();
        checkCudaErrors(cudaMallocArray(&Array_Device, &channelDesc,WIDTH,HEIGHT ));
        checkCudaErrors(cudaBindTextureToArray(Image,Array_Device));
        checkCudaErrors(cudaMemcpy2DToArray( Array_Device,
                                        0,
                                        0,
                                        h,
                                        WIDTH*sizeof(it),
                                        WIDTH*sizeof(it),
                                        HEIGHT,
                                        cudaMemcpyHostToDevice));

        k<<<1,1>>>(0,0);
        checkCudaErrors(cudaDeviceSynchronize());
}

 int main(){
#ifdef USE_INT4
  printf("int4\n");
#endif
  p();
  return 0;
}
$ nvcc -I/usr/local/cuda/samples/common/inc -o t1918 t1918.cu
t1918.cu(40): warning: function "tex2D(texture<T, 2, cudaReadModeElementType>, float, float) [with T=it]"
/usr/local/cuda/bin/../targets/x86_64-linux/include/texture_fetch_functions.h(198): here was declared deprecated

t1918.cu: In function ‘void p()’:
t1918.cu:63:49: warning: ‘cudaError_t cudaBindTextureToArray(const texture<T, dim, readMode>&, cudaArray_const_t) [with T = int; int dim = 2; cudaTextureReadMode readMode = (cudaTextureReadMode)0; cudaError_t = cudaError; cudaArray_const_t = const cudaArray*]’ is deprecated [-Wdeprecated-declarations]
  checkCudaErrors(cudaBindTextureToArray(Image,Array_Device));
                                                 ^
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime.h:1642:53: note: declared here
 static __CUDA_DEPRECATED __inline__ __host__ cudaError_t cudaBindTextureToArray(
                                                     ^~~~~~~~~~~~~~~~~~~~~~
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime.h: In instantiation of ‘cudaError_t cudaBindTextureToArray(const texture<T, dim, readMode>&, cudaArray_const_t) [with T = int; int dim = 2; cudaTextureReadMode readMode = (cudaTextureReadMode)0; cudaError_t = cudaError; cudaArray_const_t = const cudaArray*]’:
t1918.cu:63:49:   required from here
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime.h:1650:55: warning: ‘cudaError_t cudaBindTextureToArray(const texture<T, dim, readMode>&, cudaArray_const_t, const cudaChannelFormatDesc&) [with T = int; int dim = 2; cudaTextureReadMode readMode = (cudaTextureReadMode)0; cudaError_t = cudaError; cudaArray_const_t = const cudaArray*]’ is deprecated [-Wdeprecated-declarations]
   return err == cudaSuccess ? cudaBindTextureToArray(tex, array, desc) : err;
                                 ~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime.h:1603:53: note: declared here
 static __CUDA_DEPRECATED __inline__ __host__ cudaError_t cudaBindTextureToArray(
                                                     ^~~~~~~~~~~~~~~~~~~~~~
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime.h: In instantiation of ‘cudaError_t cudaBindTextureToArray(const texture<T, dim, readMode>&, cudaArray_const_t, const cudaChannelFormatDesc&) [with T = int; int dim = 2; cudaTextureReadMode readMode = (cudaTextureReadMode)0; cudaError_t = cudaError; cudaArray_const_t = const cudaArray*]’:
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime.h:1650:55:   required from ‘cudaError_t cudaBindTextureToArray(const texture<T, dim, readMode>&, cudaArray_const_t) [with T = int; int dim = 2; cudaTextureReadMode readMode = (cudaTextureReadMode)0; cudaError_t = cudaError; cudaArray_const_t = const cudaArray*]’
t1918.cu:63:49:   required from here
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime.h:1609:32: warning: ‘cudaError_t cudaBindTextureToArray(const textureReference*, cudaArray_const_t, const cudaChannelFormatDesc*)’ is deprecated [-Wdeprecated-declarations]
   return ::cudaBindTextureToArray(&tex, array, &desc);
        ~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime_api.h:8662:46: note: declared here
 extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaBindTextureToArray(const struct textureReference *texref, cudaArray_const_t array, const struct cudaChannelFormatDesc *desc);
                                              ^~~~~~~~~~~~~~~~~~~~~~
$ cuda-memcheck ./t1918
========= CUDA-MEMCHECK
========= ERROR SUMMARY: 0 errors
$ nvcc -I/usr/local/cuda/samples/common/inc -o t1918 t1918.cu -DUSE_INT4
t1918.cu(40): warning: function "tex2D(texture<T, 2, cudaReadModeElementType>, float, float) [with T=it]"
/usr/local/cuda/bin/../targets/x86_64-linux/include/texture_fetch_functions.h(198): here was declared deprecated

t1918.cu: In function ‘void p()’:
t1918.cu:63:49: warning: ‘cudaError_t cudaBindTextureToArray(const texture<T, dim, readMode>&, cudaArray_const_t) [with T = int4; int dim = 2; cudaTextureReadMode readMode = (cudaTextureReadMode)0; cudaError_t = cudaError; cudaArray_const_t = const cudaArray*]’ is deprecated [-Wdeprecated-declarations]
  checkCudaErrors(cudaBindTextureToArray(Image,Array_Device));
                                                 ^
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime.h:1642:53: note: declared here
 static __CUDA_DEPRECATED __inline__ __host__ cudaError_t cudaBindTextureToArray(
                                                     ^~~~~~~~~~~~~~~~~~~~~~
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime.h: In instantiation of ‘cudaError_t cudaBindTextureToArray(const texture<T, dim, readMode>&, cudaArray_const_t) [with T = int4; int dim = 2; cudaTextureReadMode readMode = (cudaTextureReadMode)0; cudaError_t = cudaError; cudaArray_const_t = const cudaArray*]’:
t1918.cu:63:49:   required from here
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime.h:1650:55: warning: ‘cudaError_t cudaBindTextureToArray(const texture<T, dim, readMode>&, cudaArray_const_t, const cudaChannelFormatDesc&) [with T = int4; int dim = 2; cudaTextureReadMode readMode = (cudaTextureReadMode)0; cudaError_t = cudaError; cudaArray_const_t = const cudaArray*]’ is deprecated [-Wdeprecated-declarations]
   return err == cudaSuccess ? cudaBindTextureToArray(tex, array, desc) : err;
                                 ~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime.h:1603:53: note: declared here
 static __CUDA_DEPRECATED __inline__ __host__ cudaError_t cudaBindTextureToArray(
                                                     ^~~~~~~~~~~~~~~~~~~~~~
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime.h: In instantiation of ‘cudaError_t cudaBindTextureToArray(const texture<T, dim, readMode>&, cudaArray_const_t, const cudaChannelFormatDesc&) [with T = int4; int dim = 2; cudaTextureReadMode readMode = (cudaTextureReadMode)0; cudaError_t = cudaError; cudaArray_const_t = const cudaArray*]’:
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime.h:1650:55:   required from ‘cudaError_t cudaBindTextureToArray(const texture<T, dim, readMode>&, cudaArray_const_t) [with T = int4; int dim = 2; cudaTextureReadMode readMode = (cudaTextureReadMode)0; cudaError_t = cudaError; cudaArray_const_t = const cudaArray*]’
t1918.cu:63:49:   required from here
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime.h:1609:32: warning: ‘cudaError_t cudaBindTextureToArray(const textureReference*, cudaArray_const_t, const cudaChannelFormatDesc*)’ is deprecated [-Wdeprecated-declarations]
   return ::cudaBindTextureToArray(&tex, array, &desc);
        ~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~
/usr/local/cuda/bin/../targets/x86_64-linux/include/cuda_runtime_api.h:8662:46: note: declared here
 extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaBindTextureToArray(const struct textureReference *texref, cudaArray_const_t array, const struct cudaChannelFormatDesc *desc);
                                              ^~~~~~~~~~~~~~~~~~~~~~
$ cuda-memcheck ./t1918
========= CUDA-MEMCHECK
int4
========= ERROR SUMMARY: 0 errors
$

int案例中存储在纹理中的数据如下所示:

   0    1    2    3    4    5    6    7 ...
1000 1001 1002 1003 1004 1005 1006 1007...
2000 2001 2002 2003 2004 2005 2006 2007...
3000 3001 3002 3003 3004 3005 3006 3007...
...

int4 的情况下,它看起来像这样:

{   0,   1,   2,   3} {   4,   5,   6,   7} ... 
{1000,1001,1002,1003} {1004,1005,1006,1007} ... 
{2000,2001,2002,2003} {2004,2005,2006,2007} ...
{3000,3001,3002,3003} {3004,3005,3006,3007} ...
...

内核演示了在任何一种情况下,如何为给定内核的给定 (x,y) 坐标检索相同的值。

请注意,纹理已弃用,对于新作品,您应该切换到 texture objects