当从内核调用函数时,OpenCL 程序不会构建
OpenCL program doesn't build when a function is called from a kernel
这个问题来自 问题。
我有一个调用常规函数的内核。当我构建并 运行 我的代码时,我得到以下输出:
Number of devices: 2
building program failed
-----COULD NOT CREATE KERNEL!!---
我的 .cl
文件中有问题的部分如下:
#define IDCT_INT_MIN (- IDCT_INT_MAX - 1)
#define IDCT_INT_MAX 2147483647
....
....
....
....
....
....
#define SCALE(x,n) ((x) << (n))
#define but(a,b,x,y) { x = SUB(a,b); y = ADD(a,b); }
static int DESCALE (int x, int n)
{
return (x + (1 << (n - 1)) - (x < 0)) >> n;
}
static int ADD(int x, int y)
{
int r = x + y;
return r;
}
static int SUB(int x, int y)
{
int r = x - y;
return r;
}
static int CMUL(int c, int x)
{
int r = c * x;
r = (r + (1 << (C_BITS - 1))) >> C_BITS;
return r;
}
static void rot(int f, int k, int x, int y, int *rx, int *ry) {
int COS[2][8] = {
{c0_1, c1_1, c2_1, c3_1, c4_1, c5_1, c6_1, c7_1},
{c0_s2, c1_s2, c2_s2, c3_s2, c4_s2, c5_s2, c6_s2, c7_s2}
};
*rx = SUB(CMUL(COS[f][k], x), CMUL(COS[f][8 - k], y));
*ry = ADD(CMUL(COS[f][8 - k], x), CMUL(COS[f][k], y));
}
void idct_1D(__private int *Y);
__kernel void IDCT(__global int *input, __global uchar *output)
{
int Y[64];
int k, l;
int Yc[8];
for (k = 0; k < 8; k++)
{
for (l = 0; l < 8; l++) Y(k, l) = SCALE(input[(k << 3) + l], S_BITS);
idct_1d(&Y(k, 0));
}
for (l = 0; l < 8; l++)
{
for (k = 0; k < 8; k++)
{
Yc[k] = Y(k, l);
}
idct_1d(Yc);
for (k = 0; k < 8; k++)
{
int r = 128 + DESCALE(Yc[k], S_BITS + 3);
r = r > 0 ? (r < 255 ? r : 255) : 0;
X(k, l) = r;
}
}
}
void idct_1D(__private int *Y)
{
int z1[8], z2[8], z3[8];
but(Y[0], Y[4], z1[1], z1[0]);
rot(1, 6, Y[2], Y[6], &z1[2], &z1[3]);
but(Y[1], Y[7], z1[4], z1[7]);
z1[5] = CMUL(sqrt2, Y[3]);
z1[6] = CMUL(sqrt2, Y[5]);
but(z1[0], z1[3], z2[3], z2[0]);
but(z1[1], z1[2], z2[2], z2[1]);
but(z1[4], z1[6], z2[6], z2[4]);
but(z1[7], z1[5], z2[5], z2[7]);
z3[0] = z2[0];
z3[1] = z2[1];
z3[2] = z2[2];
z3[3] = z2[3];
rot(0, 3, z2[4], z2[7], &z3[4], &z3[7]);
rot(0, 1, z2[5], z2[6], &z3[5], &z3[6]);
but(z3[0], z3[7], Y[7], Y[0]);
but(z3[1], z3[6], Y[6], Y[1]);
but(z3[2], z3[5], Y[5], Y[2]);
but(z3[3], z3[4], Y[4], Y[3]);
}
错误是由我从内核 IDCT
.
调用的函数 idct_1D
引起的
如何解决内核函数的错误?
有没有办法从内核调用函数?
编辑:
按照pmdj的回答,我按照以下方式编写了构建语句:
ret= clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
if (ret != CL_SUCCESS)
{
printf("building program failed\n");
size_t log_size;
char buffer[2048];
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &log_size);
printf("%s\n", buffer);
}
我在构建和 运行ning 我的代码后得到了以下响应:
Number of devices: 2
building program failed
ptxas application ptx input, line 71; error : Call has wrong number of parameters
ptxas application ptx input, line 112; error : Call has wrong number of parameters
ptxas application ptx input, line 153; error : Call has wrong number of parameters
ptxas application ptx input, line 194; error : Call has wrong number of parameters
ptxas application ptx input, line 235; error : Call has wrong number of parameters
ptxas application ptx input, line 276; error : Call has wrong number of parameters
ptxas application ptx input, line 317; error : Call has wrong number of parameters
ptxas application ptx input, line 358; error : Call has wrong number of parameters
ptxas application ptx input, line 392; error : Call has wrong number of parameters
ptxas application ptx input, line 520; error : Call has wrong number of parameters
ptxas application ptx input, line 648; error : Call has wrong number of parameters
ptxas application ptx input, line 776; error : Call has wrong number of parameters
ptxas application ptx input, line 904; error : Call has wrong number of parameters
ptxas application ptx input, line 1032; error : Call has wrong number of parameters
ptxas application ptx input, line 1160; error : Call has wrong number of parameters
ptxas application ptx input, line 1288; error : Call has wrong number of parameters
ptxas fatal : Ptx assembly aborted due to errors
-----COULD NOT CREATE KERNEL!!---
我检查了这个 github link 并从我的 .cl
文件中删除了所有注释和 printf 语句。尽管如此,在构建和 运行 编译代码后,我仍然遇到相同的错误。
编辑:
该代码反映了我在遵循 pmdj 的建议后尝试在代码中实现的更改。现在我只收到语法错误。
我现在得到的输出如下:
Number of devices: 2
building program failed
<kernel>:98:70: error: invalid address space for pointee of pointer argument to __kernel function
__kernel void IDCT(__global int *input, __global uchar *output, int *Yc, int *Yin)
^
<kernel>:98:79: error: invalid address space for pointee of pointer argument to __kernel function
__kernel void IDCT(__global int *input, __global uchar *output, int *Yc, int *Yin)
^
<kernel>:105:30: error: called object type '__attribute__((address_space(16776963))) int *' is not a function or function pointer
for (l = 0; l < 8; l++) Yin(k, l) = SCALE(input[(k << 3) + l], S_BITS);
~~~^
<kernel>:106:3: warning: implicit declaration of function 'idct_1d' is invalid in C99
idct_1d(&Yin(k, 0));
^
<kernel>:106:15: error: called object type '__attribute__((address_space(16776963))) int *' is not a function or function pointer
idct_1d(&Yin(k, 0));
~~~^
<kernel>:114:15: error: called object type '__attribute__((address_space(16776963))) int *' is not a function or function pointer
Yc[k] = Yin(k, l);
~~~^
-----COULD NOT CREATE KERNEL!!---
编辑:
我将 __private
放在函数参数之前,但我再次收到 ptx 错误。
编辑:
代码现在可以工作了。结果我用错误的名称调用了函数(idct_1d
而不是 idct_1D
)。
我的函数标题现在如下:
void idct_1D(int *Y);
__kernel void IDCT(__global int *input, __global uchar *output);
在clBuildProgram()
失败后,您可以使用clGetProgramBuildInfo()
函数查看详细的编译器输出。像这样:
size_t len;
char buffer[2048];
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
https://www.khronos.org/registry/OpenCL/sdk/1.0/docs/man/xhtml/clGetProgramBuildInfo.html
顺便说一句,问题很可能是你的 idct_1D
函数接受了一个指向 __global
内存(默认)的指针,而你正试图将它传递给 [=15= 中的数组] 记忆。 在 OpenCL 中,始终用正确的内存标记您的指针 space。
这个问题来自
我有一个调用常规函数的内核。当我构建并 运行 我的代码时,我得到以下输出:
Number of devices: 2
building program failed
-----COULD NOT CREATE KERNEL!!---
我的 .cl
文件中有问题的部分如下:
#define IDCT_INT_MIN (- IDCT_INT_MAX - 1)
#define IDCT_INT_MAX 2147483647
....
....
....
....
....
....
#define SCALE(x,n) ((x) << (n))
#define but(a,b,x,y) { x = SUB(a,b); y = ADD(a,b); }
static int DESCALE (int x, int n)
{
return (x + (1 << (n - 1)) - (x < 0)) >> n;
}
static int ADD(int x, int y)
{
int r = x + y;
return r;
}
static int SUB(int x, int y)
{
int r = x - y;
return r;
}
static int CMUL(int c, int x)
{
int r = c * x;
r = (r + (1 << (C_BITS - 1))) >> C_BITS;
return r;
}
static void rot(int f, int k, int x, int y, int *rx, int *ry) {
int COS[2][8] = {
{c0_1, c1_1, c2_1, c3_1, c4_1, c5_1, c6_1, c7_1},
{c0_s2, c1_s2, c2_s2, c3_s2, c4_s2, c5_s2, c6_s2, c7_s2}
};
*rx = SUB(CMUL(COS[f][k], x), CMUL(COS[f][8 - k], y));
*ry = ADD(CMUL(COS[f][8 - k], x), CMUL(COS[f][k], y));
}
void idct_1D(__private int *Y);
__kernel void IDCT(__global int *input, __global uchar *output)
{
int Y[64];
int k, l;
int Yc[8];
for (k = 0; k < 8; k++)
{
for (l = 0; l < 8; l++) Y(k, l) = SCALE(input[(k << 3) + l], S_BITS);
idct_1d(&Y(k, 0));
}
for (l = 0; l < 8; l++)
{
for (k = 0; k < 8; k++)
{
Yc[k] = Y(k, l);
}
idct_1d(Yc);
for (k = 0; k < 8; k++)
{
int r = 128 + DESCALE(Yc[k], S_BITS + 3);
r = r > 0 ? (r < 255 ? r : 255) : 0;
X(k, l) = r;
}
}
}
void idct_1D(__private int *Y)
{
int z1[8], z2[8], z3[8];
but(Y[0], Y[4], z1[1], z1[0]);
rot(1, 6, Y[2], Y[6], &z1[2], &z1[3]);
but(Y[1], Y[7], z1[4], z1[7]);
z1[5] = CMUL(sqrt2, Y[3]);
z1[6] = CMUL(sqrt2, Y[5]);
but(z1[0], z1[3], z2[3], z2[0]);
but(z1[1], z1[2], z2[2], z2[1]);
but(z1[4], z1[6], z2[6], z2[4]);
but(z1[7], z1[5], z2[5], z2[7]);
z3[0] = z2[0];
z3[1] = z2[1];
z3[2] = z2[2];
z3[3] = z2[3];
rot(0, 3, z2[4], z2[7], &z3[4], &z3[7]);
rot(0, 1, z2[5], z2[6], &z3[5], &z3[6]);
but(z3[0], z3[7], Y[7], Y[0]);
but(z3[1], z3[6], Y[6], Y[1]);
but(z3[2], z3[5], Y[5], Y[2]);
but(z3[3], z3[4], Y[4], Y[3]);
}
错误是由我从内核 IDCT
.
idct_1D
引起的
如何解决内核函数的错误?
有没有办法从内核调用函数?
编辑:
按照pmdj的回答,我按照以下方式编写了构建语句:
ret= clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
if (ret != CL_SUCCESS)
{
printf("building program failed\n");
size_t log_size;
char buffer[2048];
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &log_size);
printf("%s\n", buffer);
}
我在构建和 运行ning 我的代码后得到了以下响应:
Number of devices: 2
building program failed
ptxas application ptx input, line 71; error : Call has wrong number of parameters
ptxas application ptx input, line 112; error : Call has wrong number of parameters
ptxas application ptx input, line 153; error : Call has wrong number of parameters
ptxas application ptx input, line 194; error : Call has wrong number of parameters
ptxas application ptx input, line 235; error : Call has wrong number of parameters
ptxas application ptx input, line 276; error : Call has wrong number of parameters
ptxas application ptx input, line 317; error : Call has wrong number of parameters
ptxas application ptx input, line 358; error : Call has wrong number of parameters
ptxas application ptx input, line 392; error : Call has wrong number of parameters
ptxas application ptx input, line 520; error : Call has wrong number of parameters
ptxas application ptx input, line 648; error : Call has wrong number of parameters
ptxas application ptx input, line 776; error : Call has wrong number of parameters
ptxas application ptx input, line 904; error : Call has wrong number of parameters
ptxas application ptx input, line 1032; error : Call has wrong number of parameters
ptxas application ptx input, line 1160; error : Call has wrong number of parameters
ptxas application ptx input, line 1288; error : Call has wrong number of parameters
ptxas fatal : Ptx assembly aborted due to errors
-----COULD NOT CREATE KERNEL!!---
我检查了这个 github link 并从我的 .cl
文件中删除了所有注释和 printf 语句。尽管如此,在构建和 运行 编译代码后,我仍然遇到相同的错误。
编辑:
该代码反映了我在遵循 pmdj 的建议后尝试在代码中实现的更改。现在我只收到语法错误。
我现在得到的输出如下:
Number of devices: 2
building program failed
<kernel>:98:70: error: invalid address space for pointee of pointer argument to __kernel function
__kernel void IDCT(__global int *input, __global uchar *output, int *Yc, int *Yin)
^
<kernel>:98:79: error: invalid address space for pointee of pointer argument to __kernel function
__kernel void IDCT(__global int *input, __global uchar *output, int *Yc, int *Yin)
^
<kernel>:105:30: error: called object type '__attribute__((address_space(16776963))) int *' is not a function or function pointer
for (l = 0; l < 8; l++) Yin(k, l) = SCALE(input[(k << 3) + l], S_BITS);
~~~^
<kernel>:106:3: warning: implicit declaration of function 'idct_1d' is invalid in C99
idct_1d(&Yin(k, 0));
^
<kernel>:106:15: error: called object type '__attribute__((address_space(16776963))) int *' is not a function or function pointer
idct_1d(&Yin(k, 0));
~~~^
<kernel>:114:15: error: called object type '__attribute__((address_space(16776963))) int *' is not a function or function pointer
Yc[k] = Yin(k, l);
~~~^
-----COULD NOT CREATE KERNEL!!---
编辑:
我将 __private
放在函数参数之前,但我再次收到 ptx 错误。
编辑:
代码现在可以工作了。结果我用错误的名称调用了函数(idct_1d
而不是 idct_1D
)。
我的函数标题现在如下:
void idct_1D(int *Y);
__kernel void IDCT(__global int *input, __global uchar *output);
在clBuildProgram()
失败后,您可以使用clGetProgramBuildInfo()
函数查看详细的编译器输出。像这样:
size_t len;
char buffer[2048];
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
https://www.khronos.org/registry/OpenCL/sdk/1.0/docs/man/xhtml/clGetProgramBuildInfo.html
顺便说一句,问题很可能是你的 idct_1D
函数接受了一个指向 __global
内存(默认)的指针,而你正试图将它传递给 [=15= 中的数组] 记忆。 在 OpenCL 中,始终用正确的内存标记您的指针 space。