OpenCL:How 为输入和输出创建缓冲区时指定数组的大小

OpenCL:How to specify the size of arrays when create buffers for the input and output

我写了一个编译成功的内核代码。现在我开始编写主机代码。但是在主机中,当尝试创建内存对象(为输入和输出创建缓冲区)时,我对我的输入是什么以及如何指定输入和输出数组的大小感到困惑。我尝试以下主机代码。

cl_mem dev_X_train = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int) * [4344][20], NULL, NULL);// size
cl_mem dev_Y_train = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int) * [4344], NULL, NULL);// size 

内核代码:

    inline float distance(__global int* restrict array_point_A, __global int* restrict array_point_B) {
    float sum = 0.0;
    float  w[20] = { 0.0847282, 0.0408621, 0.105036, 0.0619821, 0.0595455, 0.0416739, 0.0181147, 0.00592921,
     0.040049, 0.0766054, 0.0441091, 0.0376111, 0.0124285, 0.0733558, 0.0587338, 0.0303001, 0.0579207, 0.0449221,
          0.0530462, 0.0530462 };
    for (int i = 0; i < 20; ++i) {
        float a = array_point_A[i] - array_point_B[i];
        float wieghted_distance = w[i] * (a * a);
        sum += wieghted_distance;

    }
    return sqrt(sum);
}
__kernel void classifier(__global int * restrict X_train,__global int * restrict Y_train,__global int * restrict data_point, int k)
{
     
    float array_dist[4344] = {};
    int index_arr[4344] = {};
    for (int i = 0; i < 4344; ++i)
    { 
       array_dist[i] = distance(X_train,data_point);
       index_arr[i] = i;
        }
     .......................
..............................
 int  class_label = min_index; } 
int X_train[4344][20];
int Y_train[4344];
int data_point[10000];
int k;

cl_mem dev_X_train = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int) * 4344*20, NULL, NULL);// size
cl_mem dev_Y_train = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int) * 4344, NULL, NULL);// size 
cl_mem dev_data_point = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(int) * 10000, NULL, NULL);// size 

clSetKernelArg(kernel, 0, sizeof(cl_mem), &dev_X_train);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &dev_Y_train);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &dev_data_point);
clSetKernelArg(kernel, 3, sizeof(int), &k);


clEnqueueWriteBuffer(cmdqId, dev_X_train, 1, 0,4344*20*sizeof(int), &X_train, 0, NULL, NULL);
clEnqueueWriteBuffer(cmdqId, dev_Y_train, 1, 0,4344*sizeof(int), &Y_train, 0, NULL, NULL);
clEnqueueWriteBuffer(cmdqId, dev_data_point, 1, 0,10000*sizeof(int), &data_point, 0, NULL, NULL);

您需要先在主机端初始化数据,然后使用 clCreateBuffer 和主机指针将其发送到设备,然后使用 clEnqueueReadBuffer[ 写入设备=24=]。稍后使用 clSetKernelArg 将内存指针映射到内核参数。

注意:我没有在你的代码中得到 data_point 的大小,所以假设一个随机值。

对于读取输出:

int output[10000];
cl_mem dev_output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(int) * 10000, NULL, NULL);// size 
clSetKernelArg(kernel, 4, sizeof(cl_mem), &dev_output);

//clEnqueueNDRangeKernel()
clEnqueueReadBuffer(cmdqId, dev_output, 1, 0, 10000*sizeof(float), output, 0, NULL, NULL);

在您的内核中,将最后一个参数指向您的输出并向其写入数据。最后使用 clEnqueueReadBuffer() 读取。