为指针引用的结构的动态分配的结构成员数组赋值时出现 openacc 错误

openacc error when assigning values to dynamically allocated struct member array of struct referenced by pointer

我正在努力将 openacc 与指向包含动态分配成员的结构的指针结合起来。下面的代码失败

Failing in Thread:1 call to cuStreamSynchronize returned error 700: Illegal address during kernel execution

使用 nvc 编译时(“x86-64 上的 nvc 20.9-0 LLVM 64 位目标 Linux -tp haswell”)。据我所知,我正在遵循 OpenACC 'getting started' 指南中建议的方法。但不知何故,指针可能不会粘在设备上(?)。有人知道这里出了什么问题吗?

#include <stdlib.h>
#include <stdio.h>

typedef struct grid
  int N;
  double *X;
} grid;

void allocate(grid* g, int N)
  g->N = N;
  g->X = (double*) malloc(sizeof(double) * g->N);

  #pragma acc enter data create(g[0:1])
  #pragma acc enter data create(g->X[0:N])

void release(grid* g)
  #pragma acc exit data delete(g->X[0:g->N])
  #pragma acc exit data delete(g[0:1])


void fill(grid * g)
  int i;

  #pragma acc parallel loop
  for (i = 0; i < g->N; i++)
    g->X[i] = 42; // the cuprit, commenting this removes the error too

int main()
  grid g;

  allocate(&g, 10);



  return 0;


          32, Accelerator restriction: size of the GPU copy of g is unknown
              Generating Tesla code
              32, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
          32, Generating implicit copyin(g) [if not already present]
          37, Generating update self(g->X[:g->N])

问题在于编译器无法隐式复制具有动态数据成员的聚合类型,因此您需要添加“present(g)”以指示 g 已经是设备。

此外,您需要复制以在设备上获取 N 的值,而无需在退出数据删除指令中包含数组形状。例如:

% cat test.c
#include <stdlib.h>
#include <stdio.h>

typedef struct grid
  int N;
  double *X;
} grid;

void allocate(grid* g, int N)
  g->N = N;
  g->X = (double*) malloc(sizeof(double) * g->N);

  #pragma acc enter data copyin(g[0:1])
  #pragma acc enter data create(g->X[0:N])

void release(grid* g)
  #pragma acc exit data delete(g->X)
  #pragma acc exit data delete(g)


void fill(grid * g)
  int i;

  #pragma acc parallel loop present(g)
  for (i = 0; i < g->N; i++)
    g->X[i] = 42; // the cuprit, commenting this removes the error too
  #pragma acc update self(g->X[:g->N])
  for (i = 0; i < 4; i++)
    printf("%d : %f \n",i,g->X[i]);

int main()
  grid g;

  allocate(&g, 10);



  return 0;

% nvc -acc test.c -Minfo=accel -V20.9 ; a.out
     17, Generating enter data copyin(g[:1])
         Generating enter data create(g->X[:N])
     24, Generating exit data delete(g[:1],g->X[:1])
     32, Generating present(g[:1])
         Generating Tesla code
         32, #pragma acc loop gang, vector(128) /* blockIdx.x threadIdx.x */
     37, Generating update self(g->X[:g->N])
0 : 42.000000
1 : 42.000000
2 : 42.000000
3 : 42.000000