使用所有 CPU 和 GPU 设备创建 OpenCL 上下文
Create an OpenCL Context using all CPU and GPU Devices
我正在使用 OpenCL 1.1。我将 运行 我所有的 gpus 和我所有的 cpus 上的代码。因此,由于很难在两个不同的上下文中进行同步,我想创建一个包含所有 CPU 和 GPU 作为设备的上下文。所以,首先我要获取所有平台,然后是与每个平台相关的设备,然后获取 CPU & GPU 设备并将它们存储在单独的向量中。然后,为了制作上下文,我将创建一个由所有 CPU 和 GPU 设备制作的矢量。然后,我会调用clCreateContext
。它可以正常工作,但之后,当我想分别为每个设备创建命令队列时,它总是给我:
OpenCL call falls with error -34.
代码如下:
cl_int error = CL_SUCCESS;
cl_uint num_platforms;
clGetPlatformIDs(0, nullptr, &num_platforms);
if (num_platforms == 0){
std::cout << "Cannot find any platform.\n";
return;
}
platform.resize(num_platforms);
error = clGetPlatformIDs(num_platforms, platform.data(), nullptr);
checkError(error);
for (cl_uint i = 0; i < num_platforms; i++){
std::string platform_name;
size_t platform_name_len;
clGetPlatformInfo(platform[i], CL_PLATFORM_NAME, 0, nullptr, &platform_name_len);
platform_name.resize(platform_name_len);
clGetPlatformInfo(platform[i], CL_PLATFORM_NAME, platform_name_len, const_cast<char*>(platform_name.data()), nullptr);
std::cout << "[" << i << "]\t" << platform_name << std::endl;
std::vector<cl_device_id> devices(0);
cl_uint num_cpus = 0, num_gpus = 0;
error = clGetDeviceIDs(platform[i], CL_DEVICE_TYPE_CPU, 0, nullptr, &num_cpus);
error = clGetDeviceIDs(platform[i], CL_DEVICE_TYPE_GPU, 0, nullptr, &num_gpus);
devices.resize(num_cpus);
std::cout << "\tCPUS: \n";
error = clGetDeviceIDs(platform[i], CL_DEVICE_TYPE_CPU, num_cpus, devices.data(), nullptr);
for (cl_uint d = 0; d < num_cpus; d++){
std::string device_name;
size_t device_name_len;
clGetDeviceInfo(devices[d], CL_DEVICE_NAME, 0, nullptr, &device_name_len);
device_name.resize(device_name_len);
clGetDeviceInfo(devices[d], CL_DEVICE_NAME, device_name_len, const_cast<char*>(device_name.data()), nullptr);
std::cout << "\t\t[" << d << "]\t" << device_name << std::endl;
cpu_devices.push_back(devices[d]);
}
std::cout << "\tGPUS: \n";
devices.resize(num_gpus);
error = clGetDeviceIDs(platform[i], CL_DEVICE_TYPE_GPU, num_gpus, devices.data(), nullptr);
for (cl_uint d = 0; d < num_gpus; d++){
std::string device_name;
size_t device_name_len;
clGetDeviceInfo(devices[d], CL_DEVICE_NAME, 0, nullptr, &device_name_len);
device_name.resize(device_name_len);
clGetDeviceInfo(devices[d], CL_DEVICE_NAME, device_name_len, const_cast<char*>(device_name.data()), nullptr);
std::cout << "\t\t[" << d << "]\t" << device_name << std::endl;
gpu_devices.push_back(devices[d]);
}
}
std::vector<cl_device_id> devices;
for (size_t i = 0; i < cpu_devices.size(); i++)
devices.push_back(cpu_devices[i]);
for (size_t i = 0; i < gpu_devices.size(); i++)
devices.push_back(gpu_devices[i]);
ctx = clCreateContext(NULL, static_cast<cl_uint>(devices.size()), devices.data(), nullptr, nullptr, nullptr);
cpu_devices_queue.resize(cpu_devices.size());
for (size_t i = 0; i < cpu_devices.size(); i++){
cpu_devices_queue[i] = clCreateCommandQueue(ctx, cpu_devices[i], 0, &error);
checkError(error);
}
gpu_devices_queue.resize(gpu_devices.size());
for (size_t i = 0; i < gpu_devices.size(); i++){
gpu_devices_queue[i] = clCreateCommandQueue(ctx, gpu_devices[i], 0, &error);
checkError(error);
}
OpenCL 上下文只能封装来自单一平台的设备,不能使用来自两个或多个不同平台的设备创建。
您实际上并没有检查对 clCreateContext
的调用是否成功。如果您检查 return 值或错误代码,您可能会发现它实际上失败了。这就是为什么当您稍后在对 clCreateCommandQueue
的调用中使用该上下文时,您会收到错误 -34 (CL_INVALID_CONTEXT
).
我正在使用 OpenCL 1.1。我将 运行 我所有的 gpus 和我所有的 cpus 上的代码。因此,由于很难在两个不同的上下文中进行同步,我想创建一个包含所有 CPU 和 GPU 作为设备的上下文。所以,首先我要获取所有平台,然后是与每个平台相关的设备,然后获取 CPU & GPU 设备并将它们存储在单独的向量中。然后,为了制作上下文,我将创建一个由所有 CPU 和 GPU 设备制作的矢量。然后,我会调用clCreateContext
。它可以正常工作,但之后,当我想分别为每个设备创建命令队列时,它总是给我:
OpenCL call falls with error -34.
代码如下:
cl_int error = CL_SUCCESS;
cl_uint num_platforms;
clGetPlatformIDs(0, nullptr, &num_platforms);
if (num_platforms == 0){
std::cout << "Cannot find any platform.\n";
return;
}
platform.resize(num_platforms);
error = clGetPlatformIDs(num_platforms, platform.data(), nullptr);
checkError(error);
for (cl_uint i = 0; i < num_platforms; i++){
std::string platform_name;
size_t platform_name_len;
clGetPlatformInfo(platform[i], CL_PLATFORM_NAME, 0, nullptr, &platform_name_len);
platform_name.resize(platform_name_len);
clGetPlatformInfo(platform[i], CL_PLATFORM_NAME, platform_name_len, const_cast<char*>(platform_name.data()), nullptr);
std::cout << "[" << i << "]\t" << platform_name << std::endl;
std::vector<cl_device_id> devices(0);
cl_uint num_cpus = 0, num_gpus = 0;
error = clGetDeviceIDs(platform[i], CL_DEVICE_TYPE_CPU, 0, nullptr, &num_cpus);
error = clGetDeviceIDs(platform[i], CL_DEVICE_TYPE_GPU, 0, nullptr, &num_gpus);
devices.resize(num_cpus);
std::cout << "\tCPUS: \n";
error = clGetDeviceIDs(platform[i], CL_DEVICE_TYPE_CPU, num_cpus, devices.data(), nullptr);
for (cl_uint d = 0; d < num_cpus; d++){
std::string device_name;
size_t device_name_len;
clGetDeviceInfo(devices[d], CL_DEVICE_NAME, 0, nullptr, &device_name_len);
device_name.resize(device_name_len);
clGetDeviceInfo(devices[d], CL_DEVICE_NAME, device_name_len, const_cast<char*>(device_name.data()), nullptr);
std::cout << "\t\t[" << d << "]\t" << device_name << std::endl;
cpu_devices.push_back(devices[d]);
}
std::cout << "\tGPUS: \n";
devices.resize(num_gpus);
error = clGetDeviceIDs(platform[i], CL_DEVICE_TYPE_GPU, num_gpus, devices.data(), nullptr);
for (cl_uint d = 0; d < num_gpus; d++){
std::string device_name;
size_t device_name_len;
clGetDeviceInfo(devices[d], CL_DEVICE_NAME, 0, nullptr, &device_name_len);
device_name.resize(device_name_len);
clGetDeviceInfo(devices[d], CL_DEVICE_NAME, device_name_len, const_cast<char*>(device_name.data()), nullptr);
std::cout << "\t\t[" << d << "]\t" << device_name << std::endl;
gpu_devices.push_back(devices[d]);
}
}
std::vector<cl_device_id> devices;
for (size_t i = 0; i < cpu_devices.size(); i++)
devices.push_back(cpu_devices[i]);
for (size_t i = 0; i < gpu_devices.size(); i++)
devices.push_back(gpu_devices[i]);
ctx = clCreateContext(NULL, static_cast<cl_uint>(devices.size()), devices.data(), nullptr, nullptr, nullptr);
cpu_devices_queue.resize(cpu_devices.size());
for (size_t i = 0; i < cpu_devices.size(); i++){
cpu_devices_queue[i] = clCreateCommandQueue(ctx, cpu_devices[i], 0, &error);
checkError(error);
}
gpu_devices_queue.resize(gpu_devices.size());
for (size_t i = 0; i < gpu_devices.size(); i++){
gpu_devices_queue[i] = clCreateCommandQueue(ctx, gpu_devices[i], 0, &error);
checkError(error);
}
OpenCL 上下文只能封装来自单一平台的设备,不能使用来自两个或多个不同平台的设备创建。
您实际上并没有检查对 clCreateContext
的调用是否成功。如果您检查 return 值或错误代码,您可能会发现它实际上失败了。这就是为什么当您稍后在对 clCreateCommandQueue
的调用中使用该上下文时,您会收到错误 -34 (CL_INVALID_CONTEXT
).