使用 GtkGLArea 的 OpenCL / OpenGL Interop

OpenCL / OpenGL Interop using GtkGLArea

我知道我似乎只在需要时才去拜访,但我还有一个问题。

在我的 的基础上,我成功地 运行 了一个基本的 OpenGL 程序。我现在正在添加 OpenCL 互操作和一个简单的 CL 脚本,每次渲染时都会将三角形缩小一点。

我得到的只是一个空白屏幕。注释掉获取和释放 GL objects 的部分允许渲染像以前一样工作。只是未能集成 OpenCL 组件。

opencl.h 只是一个带有辅助函数的 header,我用它来注册所有 CL object 并在最后通过单个函数调用释放它们。

编辑:2015 年 6 月 12 日 取消注释显示更多信息的 /*fprintf(stderr, "ERROR: " x " failed %d\n", cl_stat);*/ 行:

ERROR: Set cl kernel arg failed -38
ERROR: Acquiring GL objects failed -5

根据 this list,OpenCL 错误代码 -38 表示它是无效内存 object,然后在尝试 re-acquire GL objects 时出现资源不足错误.

这里是main.c

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include <glib.h>

#include <gdk/gdkx.h>
#include <epoxy/glx.h>
#include <epoxy/gl.h>
#include <gtk/gtk.h>

#include <CL/opencl.h>
#include "cl_utils.h"

#define IGNORE_VAR(type, identifier) \
{ \
  type IGNORED_VARIABLE_abcd = identifier; \
  identifier = IGNORED_VARIABLE_abcd; \
}

#define CL_ASSERT(x) \
  /*if(print_info) \
    printf(x "...\n"); */\
  if(cl_stat != CL_SUCCESS) \
  { \
    /*fprintf(stderr, "ERROR: " x " failed %d\n", cl_stat);*/ \
    goto exception; \
  }

const GLchar *vert_src = "\n" \
"#version 330                                  \n" \
"#extension GL_ARB_explicit_attrib_location: enable  \n" \
"                                              \n" \
"layout(location = 0) in vec2 in_position;     \n" \
"                                              \n" \
"void main()                                   \n" \
"{                                             \n" \
"  gl_Position = vec4(in_position, 0.0, 1.0);  \n" \
"}                                             \n";

const GLchar *frag_src = "\n" \
"void main (void)                              \n" \
"{                                             \n" \
"  gl_FragColor = vec4(1.0, 1.0, 1.0, 1.0);    \n" \
"}                                             \n";

const char *cl_src = "\n" \
"typedef struct Point{                         \n" \
"  float x;                                    \n" \
"  float y;                                    \n" \
"} Point;                                      \n" \
"                                              \n" \
"__kernel void cl_func(__global Point* point)  \n" \
"{                                             \n" \
"  const int i = get_global_id(0);             \n" \
"  const float d = 0.99;                       \n" \
"                                              \n" \
"  if(i>=3)                                    \n" \
"    return;                                   \n" \
"                                              \n" \
"  point[i].x = point[i].x * d;                \n" \
"  point[i].y = point[i].y * d;                \n" \
"}                                             \n";

struct cl
{
  clu_object_stack* stack;

  cl_platform_id* platform;
  cl_uint num_platforms;

  cl_device_id* device;
  cl_uint num_devices;

  cl_context context;
  cl_context_properties properties[7];

  cl_command_queue queue;

  cl_program program;
  cl_kernel kernel;

  cl_mem buffer;
} cl;

struct cl cl;

GLuint gl_vao, gl_buffer, gl_program;

cl_int init_cl(GtkGLArea *area)
{
  cl_int cl_stat;

  cl.stack = cluCreateObjectStack(44);

  cl_stat = clGetPlatformIDs(0, NULL, &cl.num_platforms);
  CL_ASSERT("Got number of platforms");

  cl.platform = malloc((size_t)cl.num_platforms * sizeof(cl_platform_id));
  cl_stat = clGetPlatformIDs(cl.num_platforms, cl.platform, NULL);
  CL_ASSERT("Got platforms");

  cl_stat = clGetDeviceIDs(cl.platform[0], CL_DEVICE_TYPE_GPU, 0, NULL, &cl.num_devices);
  CL_ASSERT("Got number of devices");
  printf("Number of GPU devices: %d\n", cl.num_devices);

  if(cl.num_devices == 0)
  {
    fprintf(stderr, "Num devices cannot be 0\n");
    goto exception;
  }

  cl.device = malloc((size_t)cl.num_devices * sizeof(cl_device_id));
  cl_stat = clGetDeviceIDs(cl.platform[0], CL_DEVICE_TYPE_GPU, cl.num_devices, cl.device, NULL);
  CL_ASSERT("Got devices");

  if(cl.device == NULL)
  {
    fprintf(stderr, "Devices list is NULL\n");
    goto exception;
  }

  gtk_gl_area_make_current (area);
  cl.properties[0] = CL_GL_CONTEXT_KHR;
  cl.properties[1] = (cl_context_properties) glXGetCurrentContext();
  cl.properties[2] = CL_GLX_DISPLAY_KHR;
  cl.properties[3] = (cl_context_properties) glXGetCurrentDisplay();
  cl.properties[4] = CL_CONTEXT_PLATFORM;
  cl.properties[5] = (cl_context_properties) cl.platform[0];
  cl.properties[6] = 0;

  cl.context = cluCreateContext(cl.stack, cl.properties, cl.num_devices, cl.device, NULL, NULL, &cl_stat);
  CL_ASSERT("Created cl context");

  cl.queue = cluCreateCommandQueue(cl.stack, cl.context, cl.device[0], 0, &cl_stat);
  CL_ASSERT("Created command queue");

  cl.buffer = cluCreateFromGLBuffer(cl.stack, cl.context, CL_MEM_WRITE_ONLY, gl_buffer, NULL);
  CL_ASSERT("Created cl memory object from gl buffer");

  cl.program = cluCreateProgramWithSource(cl.stack, cl.context, 1, (const char **)&cl_src, NULL, &cl_stat);
  CL_ASSERT("Created cl program object");

  cl_stat = clBuildProgram(cl.program, cl.num_devices, cl.device, NULL, NULL, NULL);
  if(cl_stat != CL_SUCCESS)
  {
    size_t ret_size;
    clGetProgramBuildInfo(cl.program, cl.device[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_size);
    char e_str[ret_size];
    clGetProgramBuildInfo(cl.program, cl.device[0], CL_PROGRAM_BUILD_LOG, ret_size, e_str, &ret_size);
    printf("%s\n", e_str);
  }
  CL_ASSERT("Built cl program object");

  cl.kernel = cluCreateKernel(cl.stack, cl.program, "cl_func", &cl_stat);
  CL_ASSERT("Created cl kernel object");

  return 0;
exception:
  return 1;
}

static gboolean realise(GtkGLArea *area, GdkGLContext *context);
static gboolean render(GtkGLArea *area, GdkGLContext *context);

int main(int argc, char** argv)
{
  gtk_init(&argc, &argv);

  GtkWidget *window  = gtk_window_new(GTK_WINDOW_TOPLEVEL),
            *gl_area = gtk_gl_area_new();

  g_signal_connect(window,  "delete-event", G_CALLBACK(gtk_main_quit), NULL);
  g_signal_connect(gl_area, "realize",      G_CALLBACK(realise),       NULL);
  g_signal_connect(gl_area, "render",       G_CALLBACK(render),        NULL);

  gtk_container_add(GTK_CONTAINER(window), gl_area);

  gtk_widget_show_all(window);

  gtk_main();

  cluFreeObjectStack(cl.stack);
  free(cl.platform);
  free(cl.device);

  return 0;
}

static gboolean realise(GtkGLArea *area, GdkGLContext *context)
{
  IGNORE_VAR(GdkGLContext*, context);

  gtk_gl_area_make_current(GTK_GL_AREA(area));
  if (gtk_gl_area_get_error (GTK_GL_AREA(area)) != NULL)
  {
    printf("Failed to initialiize buffers\n");
    return FALSE;
  }

  GLfloat verts[] = 
  {
    +0.0f, +1.0f,
    -1.0f, -1.0f,
    +1.0f, -1.0f,
  };

  GLuint frag_shader, vert_shader;
  frag_shader = glCreateShader(GL_FRAGMENT_SHADER);
  vert_shader = glCreateShader(GL_VERTEX_SHADER);

  glShaderSource(frag_shader, 1, &frag_src, NULL);
  glShaderSource(vert_shader, 1, &vert_src, NULL);

  glCompileShader(frag_shader);
  glCompileShader(vert_shader);

  gl_program = glCreateProgram();
  glAttachShader(gl_program, frag_shader);
  glAttachShader(gl_program, vert_shader);
  glLinkProgram(gl_program);

  glGenBuffers(1, &gl_buffer);
  glBindBuffer(GL_ARRAY_BUFFER, gl_buffer);
  glBufferData(GL_ARRAY_BUFFER, sizeof(verts), verts, GL_DYNAMIC_DRAW);

  glGenVertexArrays(1, &gl_vao);
  glBindVertexArray(gl_vao);

  glEnableVertexAttribArray(0);
  glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, (void*)0);
  glBindVertexArray(0);

  //glDeleteBuffers(1, &gl_buffer);
  if(init_cl(area))
    return FALSE;

  return TRUE;
}

static gboolean render(GtkGLArea *area, GdkGLContext *context)
{
  IGNORE_VAR(GdkGLContext*, context);
  IGNORE_VAR(GtkGLArea*, area);
  cl_int cl_stat;

  glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
  glClearColor(0.0, 0.0, 0.0, 1.0);

  glUseProgram(gl_program);
  glBindVertexArray(gl_vao);
  glDrawArrays(GL_TRIANGLES, 0, 3);

  glBindVertexArray (0);
  glUseProgram (0);

  glFinish();

  cl_stat = clEnqueueAcquireGLObjects(cl.queue, 1, &cl.buffer, 0, NULL, NULL);
  CL_ASSERT("Acquiring GL objects");

  cl_stat = clSetKernelArg(cl.kernel, 0, sizeof(cl_mem), (const void*)cl.buffer);
  CL_ASSERT("Set cl kernel arg");

  //size_t g_sz[1] = { 32 };
  //cl_stat = clEnqueueNDRangeKernel(cl.queue, cl.kernel, 1, NULL, g_sz, NULL, 0, NULL, NULL);
  cl_stat = clEnqueueTask(cl.queue, cl.kernel, 0, NULL, NULL);
  CL_ASSERT("Executing cl kernel");
  cl_stat = clEnqueueReleaseGLObjects(cl.queue, 1, &cl.buffer, 0, NULL, NULL);
  CL_ASSERT("Releasing GL buffer");
  clFinish(cl.queue);

  return TRUE;

exception:
  return FALSE;
}

SConstruct

import os

CFLAGS = '--std=c11 -g -Wall'
ENV = {'PATH':os.environ['PATH']}

env = Environment(CFLAGS=CFLAGS, ENV=ENV)

if os.name is 'posix':
  env['ENV']['TERM'] = os.environ['TERM']
  env.ParseConfig('pkg-config --cflags --libs gtk+-3.0')
  env.Append(LIBS = ['epoxy', 'GL', 'OpenCL'])
  env.Program(target='gl', source=['main.c', 'cl_utils.c'])

# vim: set filetype=python:

cl_utils.c

#include <stdlib.h>
#include <stdio.h>

#include "cl_utils.h"

#define FUNC __func__

#define CL_ASSERT(object, label) \
  if(!object) \
  { \
    fprintf(stderr, "CL_ERROR: failed to create CL %s object: %d\n", #object, *cl_stat); \
    goto label; \
  } \
  else \
  { \
    printf("CL_INFO: created cl_%s object\n", #object); \
  }

#define PLATFORM_INFO(_) \
    _(PROFILE) \
    _(VERSION) \
    _(NAME) \
    _(VENDOR) \
    _(EXTENSIONS)

const char* space = " ";

#define TO_STRING(TYPE, ENUM, ...) [ENUM] = #TYPE,
const char* CLU_TYPE_STRING[] = { CLU_TABLE(TO_STRING) };
#undef TO_STRING

#define TO_STRING(x) [CL_PLATFORM_##x] = #x,

const char* platform_info_string[] = { PLATFORM_INFO(TO_STRING) };
#undef TO_STRING

const char* clGetPlatformInfoString(cl_platform_info platform_info)
{
    return platform_info_string[platform_info];
}

cl_int infoPlatforms(cl_platform_id* platforms, cl_int num_platforms, cl_platform_info* params, cl_int num_params)
{
    cl_int cl_stat;
    size_t buffer_size = 10, buffer_ret_size = 0;
    char* buffer = (char*)malloc(buffer_size);

    for(cl_int i = 0; i < num_platforms; i++)
    {
        for(cl_int j = 0; j < num_params; j++)
        {
            cl_stat = clGetPlatformInfo(platforms[i], params[j], 0, NULL, &buffer_ret_size);

            if(cl_stat != CL_SUCCESS)
            {
                fprintf(stderr, "ERROR: clGetPlatformInfo failed\n");
                goto end;
            }

            if(buffer_ret_size > buffer_size)
            {
                void* tmp = NULL;
                buffer_size = buffer_ret_size;
                tmp = realloc(buffer, buffer_size);
                if(tmp == NULL)
                {
                    fprintf(stderr, "ERROR: Could not realloc memory\n");
                    perror("ERROR: ");
                    goto end;
                }
                else
                    buffer = (char*)tmp;
            }

            cl_stat = clGetPlatformInfo(platforms[i], params[j], buffer_size, buffer, &buffer_ret_size);

            if(cl_stat != CL_SUCCESS)
            {
                fprintf(stderr, "ERROR: clGetPlatformInfo failed\n");
                goto end;
            }

            printf("%s: %s\n", clGetPlatformInfoString(params[j]), buffer);
        }
        printf("\n");
    }

end:
    if(cl_stat != CL_SUCCESS)
    {
        printf("SENT TO ERROR HANDLER!\n");
        printf("CL_ERROR: %d\n", cl_stat);
    }

    free(buffer);

    if(cl_stat != CL_SUCCESS)
        return 1;

        return 0;
}

clu_object_stack* cluCreateObjectStack(int init_len)
{
  clu_object_stack* stack = (clu_object_stack*)malloc(sizeof(clu_object_stack));
  stack->length = init_len;
  stack->num_objects = 0;
  stack->list = (clu_object*)malloc(sizeof(clu_object) * init_len);

  return stack;
}

void cluFreeObjectStack(clu_object_stack* stack)
{
  cl_int ret = CL_SUCCESS;

  printf("Freeing stack: %p\n", stack);
  int i;
  for(i=stack->num_objects-1; i>=0; i--)
  {
    switch(stack->list[i].type)
    {
      #define TO_FREE(cl_type, ENUM, element, cluRelease) \
      case ENUM: \
        ret = cluRelease(stack->list[i].element); \
        /*printf("Releasing " #cl_type "\n");*/ \
        break;

      CLU_TABLE(TO_FREE)
      #undef TO_FREE

      default:
        printf("ERROR: Invalid or unsupported object type\n");
        break;
    }

    if(ret != CL_SUCCESS)
    {
      printf("Failed to release %s\n", CLU_TYPE_STRING[stack->list[i].type]);
    }
  }
  free(stack->list);
  free(stack);
}

int cluAssignToObjectGroup(struct clu_object_stack* stack, void* obj, cl_type type)
{
  if(stack->num_objects >= stack->length)
  {
    printf("Stack Error\n");
    return -1;
  }

  stack->list[stack->num_objects].type = type;

  switch(type)
  {
    #define TO_ASSIGN(cl_type, ENUM, element, ...) \
      case ENUM: \
        stack->list[stack->num_objects].element=*(cl_type*)obj; \
        /*printf("Assigning " #cl_type "\n");*/ \
        break;

    CLU_TABLE(TO_ASSIGN)
    #undef TO_ASSIGN

    default:
      printf("cluAssignToObjectGroup Failed\n");
      break;
  }

  stack->num_objects++;

  return 0;
}

cl_context cluCreateContext(
    clu_object_stack *stack,
    cl_context_properties *properties,
    cl_uint num_devices,
    const cl_device_id *devices,
    pfn_notify func,
    void *user_data,
    cl_int* cl_stat)
{

  cl_context context= clCreateContext(properties, num_devices, devices, func, user_data, cl_stat);

  if(context == 0)
    printf("Created ZERO value cl context\n");
  CL_ASSERT(context, error_ret);
  cluAssignToObjectGroup(stack, &context, CLU_CONTEXT);

error_ret:
  if(devices == NULL)
    fprintf(stderr, "%2sdevices cannot be NULL\n", space);
  if(num_devices == 0)
    fprintf(stderr, "%2snum_devices cannot be zero\n", space);
  if((func == NULL) && (user_data != NULL))
    fprintf(stderr, "%2spfn_notify cannot be NULL when user_data is not NULL\n", space);

  return context;
}

cl_command_queue cluCreateCommandQueue(
    clu_object_stack *stack,
    cl_context context,
    cl_device_id device,
    cl_command_queue_properties properties,
    cl_int *cl_stat)
{
  cl_command_queue queue = clCreateCommandQueue(context, device, properties, cl_stat);

  CL_ASSERT(queue, error_ret);
  cluAssignToObjectGroup(stack, &queue, CLU_COMMAND_QUEUE);

error_ret:
  return queue;
}

cl_mem cluCreateBuffer(
    clu_object_stack* stack,
    cl_context context,
    cl_mem_flags flags,
    size_t size,
    void *host_ptr,
    cl_int *cl_stat)
{
  cl_mem buffer = clCreateBuffer(context, flags, size, host_ptr, cl_stat);

  CL_ASSERT(buffer, error_ret);
  cluAssignToObjectGroup(stack, &buffer, CLU_MEM_OBJECT);

error_ret:
  return buffer;
}

cl_program cluCreateProgramWithSource(
    clu_object_stack* stack, 
    cl_context context,
    cl_uint count,
    const char **strings,
    const size_t *lengths,
    cl_int *cl_stat)
{
  cl_program program = clCreateProgramWithSource(context, count, strings, lengths, cl_stat);

  CL_ASSERT(program, error_ret);
  cluAssignToObjectGroup(stack, &program, CLU_PROGRAM);

error_ret:
  return program;
}

cl_kernel cluCreateKernel(
    clu_object_stack* stack,
    cl_program  program,
    const char *kernel_name,
    cl_int *cl_stat)
{
  cl_kernel kernel = clCreateKernel(program, kernel_name, cl_stat);

  CL_ASSERT(kernel, error_ret);
  cluAssignToObjectGroup(stack, &kernel, CLU_KERNEL);

error_ret:
  return kernel;
}

cl_mem cluCreateFromGLBuffer(
    clu_object_stack* stack,
    cl_context context,
    cl_mem_flags flags,
    GLuint bufobj,
    cl_int* cl_stat)
{
  cl_mem gl_buffer = clCreateFromGLBuffer(context, flags, bufobj, cl_stat);

  CL_ASSERT(gl_buffer, error_ret);
  cluAssignToObjectGroup(stack, &gl_buffer, CLU_MEM_OBJECT);

error_ret:
  return gl_buffer;
}

cl_utils.h

#ifndef __CL_UTILS_H__
#define __CL_UTILS_H__

#ifdef __cplusplus
extern "C" {
#endif /* C++ */

#include <stdarg.h>

#if defined(__APPLE__) || defined(MACOSX)
#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
#endif

#include <epoxy/gl.h>

#define CLU_TABLE(_) \
  _(cl_context, CLU_CONTEXT, context, clReleaseContext) \
  _(cl_command_queue, CLU_COMMAND_QUEUE, queue, clReleaseCommandQueue) \
  _(cl_mem,CLU_MEM_OBJECT, mem_object, clReleaseMemObject) \
  _(cl_program, CLU_PROGRAM, program, clReleaseProgram) \
  _(cl_kernel, CLU_KERNEL, kernel, clReleaseKernel)

#define TO_ENUM(cl_type, ENUM, ...) ENUM,
typedef enum cl_type{
  CLU_TABLE(TO_ENUM)
} cl_type;
#undef TO_ENUM

typedef void (*pfn_notify)(const char *errinfo, const void *private_info, size_t cb, void *user_data);

//extern const char* CLU_TYPE_STRING[];

typedef struct clu_object clu_object;

struct clu_object{
  cl_type type;
  union{
    cl_context context;
    cl_command_queue queue;
    cl_mem mem_object;
    cl_program program;
    cl_kernel kernel;
  };
};

typedef struct clu_object_stack{
  struct clu_object* list;
  int num_objects;
  int length;
} clu_object_stack;

const char* clGetPlatformInfoString(cl_platform_info platform_info);
int infoPlatforms(cl_platform_id* platforms, cl_int num_platforms, cl_platform_info* params, cl_int num_params);

clu_object_stack* cluCreateObjectStack(int init_len);
void cluFreeObjectStack(clu_object_stack* stack);

int cluAssignToObjectGroup(struct clu_object_stack* stack, void* obj, cl_type type);

cl_context cluCreateContext(
    clu_object_stack *stack,
    cl_context_properties *properties,
    cl_uint num_devices,
    const cl_device_id *devices,
    pfn_notify func,
    void *user_data,
    cl_int* cl_stat);

cl_command_queue cluCreateCommandQueue(
    clu_object_stack* stack,
    cl_context context,
    cl_device_id device,
    cl_command_queue_properties properties,
    cl_int *cl_stat);

cl_mem cluCreateBuffer(
    clu_object_stack* stack,
    cl_context context,
    cl_mem_flags flags,
    size_t size,
    void *host_ptr,
    cl_int * cl_stat);

cl_program cluCreateProgramWithSource(
    clu_object_stack* stack, 
    cl_context context,
    cl_uint count,
    const char **strings,
    const size_t *lengths,
    cl_int *cl_stat);

cl_kernel cluCreateKernel(
    clu_object_stack* stack,
    cl_program  program,
    const char *kernel_name,
    cl_int *cl_stat);

cl_mem cluCreateFromGLBuffer(
    clu_object_stack* stack,
    cl_context context,
    cl_mem_flags flags,
    GLuint bufobj,
    cl_int* cl_stat);

#ifdef __cplusplus
}
#endif /* C++ */

#endif /* __CL_UTILS_H__ */

要启用 CL-GL 互操作性,必须按特定顺序进行设置:

 1. Create OpenGL context
 2. Create OpenCL context
 3. Create OpenGL buffers
 4. Start OpenGL rendering

一个可能的问题是您在 OpenCL 上下文之前(在调用 init_cl 之前)创建了 OpenGL 缓冲区。

找到问题了。 clSetKernelArg() 的最后一个参数需要一个指向 mem 对象的指针,我忘了在前面添加 & 运算符。

所以这个:

cl_stat = clSetKernelArg(cl.kernel, 0, sizeof(cl_mem), (const void*)cl.buffer);

变成这样:

cl_stat = clSetKernelArg(cl.kernel, 0, sizeof(cl_mem), (const void*)&cl.buffer);

很简单。