OpenCL clEnqueueNDRangeKernel 异常
OpenCL clEnqueueNDRangeKernel Exception
我 运行 我的 OpenCL 程序和 clEnqueueNDRangeKernel 总是给我一个例外,即使我 CL_SUCCESS 从所有其他要求中恢复。
******.exe 中 0x55A7BAB2 (nvopencl.dll) 的未处理异常:0xC000041D:在用户回调期间遇到未处理的异常。
这是我在 github 上的代码 link:Source Code
文件“DrawingProcess.cpp”第 955 行出现异常。
我的内核位于“Renderer.h”,选中“Hierarchization”。
内核
"__kernel void Hierarchization(\n",
"__global ushort2 *ScrPos,\n",
"const float4 CamPos,\n",
"const float4 CamNorV1,\n", //W represents horizontal resolution.
"const float4 CamNorV3,\n", //W represents diagonal resolution.
"__global float4 *AllVert,\n",
"__global ushort4 *DltIdx,\n",
"__global float4 *PixVect,\n",
"__global bool *Polar,\n",
"__global int *TopDltIdx)\n",
"{\n",
" float Deep[SCOUNTER1*SCOUNTER2],Distance[SCOUNTER1*SCOUNTER2];\n",
" int GID1=(int)get_global_id(0);\n",
" int GID2=(int)get_global_id(1);\n",
" int2 v,u;\n",
" int a[SCOUNTER1*SCOUNTER2],b[SCOUNTER1*SCOUNTER2];\n",
" float4 m,n,l;\n",
" float4 p1[SCOUNTER1*SCOUNTER2],p2[SCOUNTER1*SCOUNTER2];\n",
" float t[SCOUNTER1*SCOUNTER2];",
" float4 focus[SCOUNTER1*SCOUNTER2];",
" Deep[GID1+SCOUNTER1*GID2]=-1;\n",
" for(uint i=0;i<TCOUNTER;i++){\n",
" if(Polar[i]==true){\n",
" v.xy=(int)(ScrPos[DltIdx[i].y].xy-ScrPos[DltIdx[i].x].xy);\n",
" u.xy=(int)(ScrPos[DltIdx[i].z].xy-ScrPos[DltIdx[i].x].xy);\n",
// Simplify: Px=Vx*a+Ux*b,
// Py=Vy*a+Uy*b.
" b[GID1+SCOUNTER1*GID2]=(GID2*v.x-GID1*v.y)/(u.y*v.x-u.x*v.y);\n",
" a[GID1+SCOUNTER1*GID2]=(GID1-u.x*b[GID1+SCOUNTER1*GID2])/v.x;\nmem_fence(CLK_GLOBAL_MEM_FENCE);\n",
" if(a[GID1+SCOUNTER1*GID2]>=0&&b[GID1+SCOUNTER1*GID2]>=0&&a[GID1+SCOUNTER1*GID2]+b[GID1+SCOUNTER1*GID2]<=1){\n",
// Plane Equation
" m=AllVert[DltIdx[i].y]-AllVert[DltIdx[i].x];\n",
" n=AllVert[DltIdx[i].z]-AllVert[DltIdx[i].x];\n",
" l=cross(m,n);\n",
" l.w=l.x*(-m.x)+l.y*(-m.y)+l.z*(-m.z);\n",
// Two Points in Linear Equation
" p1[GID1+SCOUNTER1*GID2]=CamPos;\n",
" p2[GID1+SCOUNTER1*GID2]=p1[GID1+SCOUNTER1*GID2]+PixVect[GID1+SCOUNTER1*GID2];",
// (x-x1)(x2-x1)=(y-y1)(y2-y1)=(z-z1)(z2-z1)=t
// ax+by+cz+d=0
" t[GID1+SCOUNTER1*GID2]=(l.x*p1[GID1+SCOUNTER1*GID2].x+l.y*p1[GID1+SCOUNTER1*GID2].y+l.z*p1[GID1+SCOUNTER1*GID2].z+l.w)/(l.x*(p1[GID1+SCOUNTER1*GID2].x-p2[GID1+SCOUNTER1*GID2].x)+l.y*(p1[GID1+SCOUNTER1*GID2].y-p2[GID1+SCOUNTER1*GID2].y)+l.z*(p1[GID1+SCOUNTER1*GID2].z-p2[GID1+SCOUNTER1*GID2].z));\n",
" focus[GID1+SCOUNTER1*GID2]=(p2[GID1+SCOUNTER1*GID2]-p1[GID1+SCOUNTER1*GID2])*t[GID1+SCOUNTER1*GID2]+p1[GID1+SCOUNTER1*GID2];\n",
" p1[GID1+SCOUNTER1*GID2].w=0;\n",
" focus[GID1+SCOUNTER1*GID2].w=0;\n",
" Distance[GID1+SCOUNTER1*GID2]=fast_distance(focus[GID1+SCOUNTER1*GID2],p1[GID1+SCOUNTER1*GID2]);\nmem_fence(CLK_GLOBAL_MEM_FENCE);\n",
" if(Deep[GID1+SCOUNTER1*GID2]<0||Deep[GID1+SCOUNTER1*GID2]>Distance[GID1+SCOUNTER1*GID2]){\n"
" TopDltIdx[GID1+SCOUNTER1*GID2]=i;\nmem_fence(CLK_GLOBAL_MEM_FENCE);\n",
" }\nmem_fence(CLK_GLOBAL_MEM_FENCE);\n",
" }\nmem_fence(CLK_GLOBAL_MEM_FENCE);\n",
" }\nmem_fence(CLK_GLOBAL_MEM_FENCE);\n",
" }\nmem_fence(CLK_GLOBAL_MEM_FENCE);\n",
"}\n",
反汇编;兑现:
00DE467C mov dword ptr ds:[016EBC68h],eax
称为层次化的那个不起作用。状态异常 = clEnqueueNDRangeKernel(CommandQueue,Hierarchization,2,NULL,GlobalThread3,LocalThread3,0,NULL,NULL);所有 9 个相关的 clSetKernelArg return CL_SUCCESS。我真的找不到问题。这个程序有 5 个内核 运行,这是第 3 个。所有其他内核都没有例外。前两个可以给出我想要的正确结果。
显卡:GT750M。我使用 GDK v7.5。请帮助。谢谢!新年快乐!
我解决了!
在文件“DrawingProcess.cpp”中。
第 20 行。
将“cl_mem BufIdx[12];” 更改为“cl_mem BufIdx[14];”.
我 运行 我的 OpenCL 程序和 clEnqueueNDRangeKernel 总是给我一个例外,即使我 CL_SUCCESS 从所有其他要求中恢复。
******.exe 中 0x55A7BAB2 (nvopencl.dll) 的未处理异常:0xC000041D:在用户回调期间遇到未处理的异常。
这是我在 github 上的代码 link:Source Code
文件“DrawingProcess.cpp”第 955 行出现异常。 我的内核位于“Renderer.h”,选中“Hierarchization”。
内核
"__kernel void Hierarchization(\n",
"__global ushort2 *ScrPos,\n",
"const float4 CamPos,\n",
"const float4 CamNorV1,\n", //W represents horizontal resolution.
"const float4 CamNorV3,\n", //W represents diagonal resolution.
"__global float4 *AllVert,\n",
"__global ushort4 *DltIdx,\n",
"__global float4 *PixVect,\n",
"__global bool *Polar,\n",
"__global int *TopDltIdx)\n",
"{\n",
" float Deep[SCOUNTER1*SCOUNTER2],Distance[SCOUNTER1*SCOUNTER2];\n",
" int GID1=(int)get_global_id(0);\n",
" int GID2=(int)get_global_id(1);\n",
" int2 v,u;\n",
" int a[SCOUNTER1*SCOUNTER2],b[SCOUNTER1*SCOUNTER2];\n",
" float4 m,n,l;\n",
" float4 p1[SCOUNTER1*SCOUNTER2],p2[SCOUNTER1*SCOUNTER2];\n",
" float t[SCOUNTER1*SCOUNTER2];",
" float4 focus[SCOUNTER1*SCOUNTER2];",
" Deep[GID1+SCOUNTER1*GID2]=-1;\n",
" for(uint i=0;i<TCOUNTER;i++){\n",
" if(Polar[i]==true){\n",
" v.xy=(int)(ScrPos[DltIdx[i].y].xy-ScrPos[DltIdx[i].x].xy);\n",
" u.xy=(int)(ScrPos[DltIdx[i].z].xy-ScrPos[DltIdx[i].x].xy);\n",
// Simplify: Px=Vx*a+Ux*b,
// Py=Vy*a+Uy*b.
" b[GID1+SCOUNTER1*GID2]=(GID2*v.x-GID1*v.y)/(u.y*v.x-u.x*v.y);\n",
" a[GID1+SCOUNTER1*GID2]=(GID1-u.x*b[GID1+SCOUNTER1*GID2])/v.x;\nmem_fence(CLK_GLOBAL_MEM_FENCE);\n",
" if(a[GID1+SCOUNTER1*GID2]>=0&&b[GID1+SCOUNTER1*GID2]>=0&&a[GID1+SCOUNTER1*GID2]+b[GID1+SCOUNTER1*GID2]<=1){\n",
// Plane Equation
" m=AllVert[DltIdx[i].y]-AllVert[DltIdx[i].x];\n",
" n=AllVert[DltIdx[i].z]-AllVert[DltIdx[i].x];\n",
" l=cross(m,n);\n",
" l.w=l.x*(-m.x)+l.y*(-m.y)+l.z*(-m.z);\n",
// Two Points in Linear Equation
" p1[GID1+SCOUNTER1*GID2]=CamPos;\n",
" p2[GID1+SCOUNTER1*GID2]=p1[GID1+SCOUNTER1*GID2]+PixVect[GID1+SCOUNTER1*GID2];",
// (x-x1)(x2-x1)=(y-y1)(y2-y1)=(z-z1)(z2-z1)=t
// ax+by+cz+d=0
" t[GID1+SCOUNTER1*GID2]=(l.x*p1[GID1+SCOUNTER1*GID2].x+l.y*p1[GID1+SCOUNTER1*GID2].y+l.z*p1[GID1+SCOUNTER1*GID2].z+l.w)/(l.x*(p1[GID1+SCOUNTER1*GID2].x-p2[GID1+SCOUNTER1*GID2].x)+l.y*(p1[GID1+SCOUNTER1*GID2].y-p2[GID1+SCOUNTER1*GID2].y)+l.z*(p1[GID1+SCOUNTER1*GID2].z-p2[GID1+SCOUNTER1*GID2].z));\n",
" focus[GID1+SCOUNTER1*GID2]=(p2[GID1+SCOUNTER1*GID2]-p1[GID1+SCOUNTER1*GID2])*t[GID1+SCOUNTER1*GID2]+p1[GID1+SCOUNTER1*GID2];\n",
" p1[GID1+SCOUNTER1*GID2].w=0;\n",
" focus[GID1+SCOUNTER1*GID2].w=0;\n",
" Distance[GID1+SCOUNTER1*GID2]=fast_distance(focus[GID1+SCOUNTER1*GID2],p1[GID1+SCOUNTER1*GID2]);\nmem_fence(CLK_GLOBAL_MEM_FENCE);\n",
" if(Deep[GID1+SCOUNTER1*GID2]<0||Deep[GID1+SCOUNTER1*GID2]>Distance[GID1+SCOUNTER1*GID2]){\n"
" TopDltIdx[GID1+SCOUNTER1*GID2]=i;\nmem_fence(CLK_GLOBAL_MEM_FENCE);\n",
" }\nmem_fence(CLK_GLOBAL_MEM_FENCE);\n",
" }\nmem_fence(CLK_GLOBAL_MEM_FENCE);\n",
" }\nmem_fence(CLK_GLOBAL_MEM_FENCE);\n",
" }\nmem_fence(CLK_GLOBAL_MEM_FENCE);\n",
"}\n",
反汇编;兑现:
00DE467C mov dword ptr ds:[016EBC68h],eax
称为层次化的那个不起作用。状态异常 = clEnqueueNDRangeKernel(CommandQueue,Hierarchization,2,NULL,GlobalThread3,LocalThread3,0,NULL,NULL);所有 9 个相关的 clSetKernelArg return CL_SUCCESS。我真的找不到问题。这个程序有 5 个内核 运行,这是第 3 个。所有其他内核都没有例外。前两个可以给出我想要的正确结果。
显卡:GT750M。我使用 GDK v7.5。请帮助。谢谢!新年快乐!
我解决了!
在文件“DrawingProcess.cpp”中。 第 20 行。 将“cl_mem BufIdx[12];” 更改为“cl_mem BufIdx[14];”.