为指针引用的结构的动态分配的结构成员数组赋值时出现 openacc 错误
openacc error when assigning values to dynamically allocated struct member array of struct referenced by pointer
我正在努力将 openacc 与指向包含动态分配成员的结构的指针结合起来。下面的代码失败
Failing in Thread:1
call to cuStreamSynchronize returned error 700: Illegal address during kernel execution
使用 nvc 编译时(“x86-64 上的 nvc 20.9-0 LLVM 64 位目标 Linux -tp haswell”)。据我所知,我正在遵循 OpenACC 'getting started' 指南中建议的方法。但不知何故,指针可能不会粘在设备上(?)。有人知道这里出了什么问题吗?
#include <stdlib.h>
#include <stdio.h>
typedef struct grid
{
int N;
double *X;
} grid;
void allocate(grid* g, int N)
{
g->N = N;
g->X = (double*) malloc(sizeof(double) * g->N);
#pragma acc enter data create(g[0:1])
#pragma acc enter data create(g->X[0:N])
}
void release(grid* g)
{
#pragma acc exit data delete(g->X[0:g->N])
#pragma acc exit data delete(g[0:1])
free(g->X);
}
void fill(grid * g)
{
int i;
#pragma acc parallel loop
for (i = 0; i < g->N; i++)
{
g->X[i] = 42; // the cuprit, commenting this removes the error too
}
}
int main()
{
grid g;
allocate(&g, 10);
fill(&g);
release(&g);
return 0;
}```
从编译器反馈消息中,您会看到如下内容:
fill:
32, Accelerator restriction: size of the GPU copy of g is unknown
Generating Tesla code
32, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
32, Generating implicit copyin(g) [if not already present]
37, Generating update self(g->X[:g->N])
问题在于编译器无法隐式复制具有动态数据成员的聚合类型,因此您需要添加“present(g)”以指示 g 已经是设备。
此外,您需要复制以在设备上获取 N 的值,而无需在退出数据删除指令中包含数组形状。例如:
% cat test.c
#include <stdlib.h>
#include <stdio.h>
typedef struct grid
{
int N;
double *X;
} grid;
void allocate(grid* g, int N)
{
g->N = N;
g->X = (double*) malloc(sizeof(double) * g->N);
#pragma acc enter data copyin(g[0:1])
#pragma acc enter data create(g->X[0:N])
}
void release(grid* g)
{
#pragma acc exit data delete(g->X)
#pragma acc exit data delete(g)
free(g->X);
}
void fill(grid * g)
{
int i;
#pragma acc parallel loop present(g)
for (i = 0; i < g->N; i++)
{
g->X[i] = 42; // the cuprit, commenting this removes the error too
}
#pragma acc update self(g->X[:g->N])
for (i = 0; i < 4; i++)
{
printf("%d : %f \n",i,g->X[i]);
}
}
int main()
{
grid g;
allocate(&g, 10);
fill(&g);
release(&g);
return 0;
}
% nvc -acc test.c -Minfo=accel -V20.9 ; a.out
allocate:
17, Generating enter data copyin(g[:1])
Generating enter data create(g->X[:N])
release:
24, Generating exit data delete(g[:1],g->X[:1])
fill:
32, Generating present(g[:1])
Generating Tesla code
32, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
37, Generating update self(g->X[:g->N])
0 : 42.000000
1 : 42.000000
2 : 42.000000
3 : 42.000000
我正在努力将 openacc 与指向包含动态分配成员的结构的指针结合起来。下面的代码失败
Failing in Thread:1 call to cuStreamSynchronize returned error 700: Illegal address during kernel execution
使用 nvc 编译时(“x86-64 上的 nvc 20.9-0 LLVM 64 位目标 Linux -tp haswell”)。据我所知,我正在遵循 OpenACC 'getting started' 指南中建议的方法。但不知何故,指针可能不会粘在设备上(?)。有人知道这里出了什么问题吗?
#include <stdlib.h>
#include <stdio.h>
typedef struct grid
{
int N;
double *X;
} grid;
void allocate(grid* g, int N)
{
g->N = N;
g->X = (double*) malloc(sizeof(double) * g->N);
#pragma acc enter data create(g[0:1])
#pragma acc enter data create(g->X[0:N])
}
void release(grid* g)
{
#pragma acc exit data delete(g->X[0:g->N])
#pragma acc exit data delete(g[0:1])
free(g->X);
}
void fill(grid * g)
{
int i;
#pragma acc parallel loop
for (i = 0; i < g->N; i++)
{
g->X[i] = 42; // the cuprit, commenting this removes the error too
}
}
int main()
{
grid g;
allocate(&g, 10);
fill(&g);
release(&g);
return 0;
}```
从编译器反馈消息中,您会看到如下内容:
fill:
32, Accelerator restriction: size of the GPU copy of g is unknown
Generating Tesla code
32, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
32, Generating implicit copyin(g) [if not already present]
37, Generating update self(g->X[:g->N])
问题在于编译器无法隐式复制具有动态数据成员的聚合类型,因此您需要添加“present(g)”以指示 g 已经是设备。
此外,您需要复制以在设备上获取 N 的值,而无需在退出数据删除指令中包含数组形状。例如:
% cat test.c
#include <stdlib.h>
#include <stdio.h>
typedef struct grid
{
int N;
double *X;
} grid;
void allocate(grid* g, int N)
{
g->N = N;
g->X = (double*) malloc(sizeof(double) * g->N);
#pragma acc enter data copyin(g[0:1])
#pragma acc enter data create(g->X[0:N])
}
void release(grid* g)
{
#pragma acc exit data delete(g->X)
#pragma acc exit data delete(g)
free(g->X);
}
void fill(grid * g)
{
int i;
#pragma acc parallel loop present(g)
for (i = 0; i < g->N; i++)
{
g->X[i] = 42; // the cuprit, commenting this removes the error too
}
#pragma acc update self(g->X[:g->N])
for (i = 0; i < 4; i++)
{
printf("%d : %f \n",i,g->X[i]);
}
}
int main()
{
grid g;
allocate(&g, 10);
fill(&g);
release(&g);
return 0;
}
% nvc -acc test.c -Minfo=accel -V20.9 ; a.out
allocate:
17, Generating enter data copyin(g[:1])
Generating enter data create(g->X[:N])
release:
24, Generating exit data delete(g[:1],g->X[:1])
fill:
32, Generating present(g[:1])
Generating Tesla code
32, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
37, Generating update self(g->X[:g->N])
0 : 42.000000
1 : 42.000000
2 : 42.000000
3 : 42.000000