PyOpenCl 无效的内核参数

Question

我在我的 gpu 上写了一些代码来与 python 中的一个过滤器并行化卷积。我不断收到此错误，但不确定如何解决。我在下面发布了错误以及我的代码。非常感谢您。

我检查了一些过去对这个问题的堆栈溢出响应，但其中 none 似乎可以解决问题。所以有可能我不知道你能捕捉到什么。

  File "gpu_test_prgrm.py", line 127, in <module>
    prg.multiplymatrices(queue, conv_img[0].shape , None, 3,3,2,2,2,2 ,cl_a, cl_b, cl_c)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/pyopencl/__init__.py", line 888, in kernel_call
    return self._enqueue(self, queue, global_size, local_size, *args, **kwargs)
  File "<generated code>", line 7, in enqueue_knl_multiplymatrices
pyopencl._cl.LogicError: Kernel.set_arg failed: INVALID_VALUE - when processing arg#1 (1-based): invalid kernel argument

ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)


fake_img = np.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]])
fake_filters = np.array([[[1.0,1.0],[1.0,1.0]],[[2.0,2.0],[2.0,2.0]]])
conv_img = np.zeros((2,2,2))


#print(fake_img)
#print(fake_filters)
#print(fake_img[0:2,0:2])
#print(fake_img.flatten())
#print(fake_filters.flatten())
#print(fake_filters[0].flatten())
#print(conv_img[0].flatten())

mf = cl.mem_flags
cl_a = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf = fake_img.flatten())
cl_b = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf = fake_filters[0].flatten())
cl_c = cl.Buffer(ctx, mf.WRITE_ONLY, conv_img[0].flatten().nbytes)

#ROWS IN FILTER, COLS IN FILTER = frow, fcol
#ROWS IN CONV IMAGE, COLS IN CONV IMAGE = crow, ccol
#ROWS IN IMAGE, COLS IN IMAGE = irow,icol

prg = cl.Program(ctx, """
    __kernel void multiplymatrices(int irow, int icol, int frow, int fcol, int crow, int ccol, __global float * fake_img, __global float * fake_filters, __global float * conv_img)
    {
    
        int i = get_global_id(0);
        int j = get_global_id(1);
        
        printf("You entered: %d", i);
        printf("You entered: %d", j);

        conv_img[i * ccol + j ] = 0;

        for (int row=0; row < frow; row++)
        {
            for (int col=0; col < fcol; col++)
            {
        
                /*(i * col + j) = translation of conv image to reg image start*/
                /*(row * icol + col) = creating the subarray in the matrix*/
                
                conv_img[i * ccol + j] += fake_img[(row * icol + col)+(i*icol+j)] * fake_filters[row * frow + col];
            
            }
        }
    }
    """).build()


t0 = datetime.datetime.now()

print(conv_img[0].shape)

prg.multiplymatrices(queue, conv_img[0].shape , None, 3,3,2,2,2,2 ,cl_a, cl_b, cl_c)

conv_img2 = np.zeros((2,2))
cl.enqueue_copy(queue, conv_img2 , cl_c)
#print(conv_img2)

delta_t = datetime.datetime.now() - t0
print('OpenCL Multiplication: ' + str(delta_t))

Answer 1

python 中的数字是 python 对象，需要包装到 np.int32() 中以将它们作为 int 传递给内核：

prg.multiplymatrices(queue, conv_img[0].shape , None, np.int32(3),np.int32(3),np.int32(2),np.int32(2),np.int32(2),np.int32(2) ,cl_a, cl_b, cl_c)

PyOpenCl 无效的内核参数

PyOpenCl Invalid Kernel Argument

python

pyopencl