使用 cmake 使用 cpp 和 cuda 源构建 pybind11 模块
building a pybind11 module with cpp and cuda sources using cmake
我正在尝试为虚拟 class 生成 python 绑定,它需要使用支持 cuda 的编译器进行编译。我正在使用 cmake 3.12.0、pybind11 v2.2.3 和 nvcc 7.5.17。编译失败,因为 -flto
和 -fno-fat-lto-objects
等选项直接传递给 nvcc,而 nvcc 无法识别它们。
这是一个(最小的)例子:
库达代码:
//Adder.hpp
#include <thrust/host_vector.h>
struct Adder {
thrust::host_vector<float> a_h;
thrust::host_vector<float> b_h;
thrust::host_vector<float> r_h;
int N;
Adder(int N);
void set_a(float const * const in);
void set_b(float const * const in);
void calc();
void calc_gpu();
};
//Adder.cu
#include "Adder.hpp"
#include <thrust/device_vector.h>
Adder::Adder(int N): N(N),a_h(N),b_h(N),r_h(N) {}
void Adder::set_a(float const * const in) {
for (int i=0; i<N; ++i) {
a_h[i] = in[i];
}
}
void Adder::set_b(float const * const in) {
for (int i=0; i<N; ++i) {
b_h[i] = in[i];
}
}
void Adder::calc() {
for (int i=0; i<N; ++i) {
r_h[i] = a_h[i]+b_h[i];
}
}
void Adder::calc_gpu() {
thrust::device_vector<float> a_d(a_h);
thrust::device_vector<float> b_d(b_h);
thrust::device_vector<float> r_d(r_h);
thrust::transform(a_d.begin(), a_d.end(), b_d.begin(), r_d.begin(),thrust::plus<float>());
r_h = r_d;
}
绑定代码:
#include "Adder.hpp"
#include "lib/include/pybind11/pybind11.h"
#include "lib/include/pybind11/numpy.h"
#include <stdexcept>
namespace py = pybind11;
void bind_Adder(py::module& m) {
py::class_<Adder>(m,"Adder","Module docstring")
.def(py::init<int>(), py::arg("N"), "Init Adder")
.def(
"set_a"
, [](Adder& self, py::array_t<float, py::array::c_style | py::array::forcecast> in) {
py::buffer_info ai = in.request();
if (ai.ndim!=1 || ai.shape[0]!=self.N || ai.strides[0]!=sizeof(float)) {
throw std::runtime_error("Shape of given numpy array must be (N,)! Type must be float.");
}
self.set_a(static_cast<float const * const>(ai.ptr));
}
, py::arg("in")
, "Set a."
)
.def(
"set_b"
, [](Adder& self, py::array_t<float, py::array::c_style | py::array::forcecast> in) {
py::buffer_info ai = in.request();
if (ai.ndim!=1 || ai.shape[0]!=self.N || ai.strides[0]!=sizeof(float))
throw std::runtime_error("Shape of given numpy array must be (N,)! Type must be float.");
self.set_b(static_cast<float const * const>(ai.ptr));
}
, py::arg("in")
, "Set b."
)
.def(
"get_r"
, [](Adder& self, py::array_t<float> x) {
auto r = x.mutable_unchecked<1>();
for (ssize_t i = 0; i < r.shape(0); i++) {
r(i) = self.r_h[i];
}
}
, py::arg("x").noconvert())
.def("calc", &Adder::calc, "Calculate on CPU.")
.def("calc_gpu", &Adder::calc_gpu, "Calculate on GPU.");
}
PYBIND11_MODULE(dummy, m) {
bind_Adder(m);
}
CMakeLists.txt:
CMAKE_MINIMUM_REQUIRED(VERSION 3.11)
project(dummy LANGUAGES CXX CUDA)
set(PYBIND11_CPP_STANDARD -std=c++11)
add_subdirectory(lib/pybind11)
pybind11_add_module(dummy
Adder.pybind.cpp
Adder.cu
)
用例如cmake ../src && make -VERBOSE=1
失败。 Adder.pybind.cpp
的目标文件已成功生成。 Adder.cu
的编译失败:
/usr/bin/nvcc -Ddummy_EXPORTS -I/home/user/projects/pybind11_cuda_cmake/lib/pybind11/include -I/home/user/.anaconda3/include/python3.6m -Xcompiler=-fPIC -std=c++11 -flto -fno-fat-lto-objects -x cu -c /home/user/projects/pybind11_cuda_cmake/Adder.cu -o CMakeFiles/dummy.dir/Adder.cu.o
nvcc fatal : Unknown option 'flto'
我试图禁用自动传播到 nvcc,但没有成功。
set(CUDA_PROPAGATE_HOST_FLAGS FALSE)
set(CUDAFLAGS "-Xcompiler -fPIC -Xcompiler -flto -Xcompiler=-fvisibility=hidden")
有人知道怎么做吗?
更新: 使用 target_set_properties 的(可能)更清晰的版本,从而让 cmake 整理出实际的 compiler/linker 标志。
cmake_minimum_required(VERSION 3.12)
project(dummy LANGUAGES CXX CUDA)
set(PYBIND11_CPP_STANDARD -std=c++11)
add_subdirectory(lib/pybind11)
add_library(dummycu STATIC
Adder.cu
)
set_target_properties(dummycu PROPERTIES
POSITION_INDEPENDENT_CODE ON
CUDA_VISIBILITY_PRESET "hidden"
# CUDA_SEPARABLE_COMPILATION ON
)
add_library(dummy MODULE
Adder.pybind.cpp
)
set_target_properties(dummy PROPERTIES
CXX_VISIBILITY_PRESET "hidden"
INTERPROCEDURAL_OPTIMIZATION TRUE
PREFIX "${PYTHON_MODULE_PREFIX}"
SUFFIX "${PYTHON_MODULE_EXTENSION}"
)
target_link_libraries(dummy PRIVATE dummycu)
target_link_libraries(dummy PRIVATE pybind11::module)
我们还可以编译 link 一个目标中的所有内容。
这可能也可以用来修改 pybind11Tools.cmake.
中的 pybind11_add_module
由于 pybind11 无条件地为其 module
目标设置了可见性标志,因此在没有 -Xcompiler
的情况下与 nvcc 一起使用,如果不想更改 pybind11 CMakeLists.txt 或pybind11Tools.cmake。为 module
目标 and/or pybind11_add_module
函数编写自定义版本可能会更好。
cmake_minimum_required(VERSION 3.12)
project(dummy LANGUAGES CXX CUDA)
set(PYBIND11_CPP_STANDARD -std=c++11)
add_subdirectory(lib/pybind11)
##pybind11 sets -fvisibility=hidden in INTERFACE_COMPILE_OPTIONS on it's module target
get_target_property(modifacecopts module INTERFACE_COMPILE_OPTIONS)
list(REMOVE_ITEM modifacecopts "-fvisibility=hidden")
set_target_properties(module PROPERTIES INTERFACE_COMPILE_OPTIONS "${modifacecopts}")
add_library(dummy MODULE
Adder.pybind.cpp
Adder.cu
)
set_target_properties(dummy PROPERTIES
POSITION_INDEPENDENT_CODE ON
CUDA_VISIBILITY_PRESET "hidden"
CXX_VISIBILITY_PRESET "hidden"
INTERPROCEDURAL_OPTIMIZATION TRUE
PREFIX "${PYTHON_MODULE_PREFIX}"
SUFFIX "${PYTHON_MODULE_EXTENSION}"
)
target_link_libraries(dummy PRIVATE pybind11::module)
您可以使用 -forward-unknown-to-host-compiler
标志将 nvcc
未知标志传递给主机编译器。
'nvcc -forward-unknown-to-host-compiler -foo=bar a.cu' will forward '-foo=bar' to host compiler.
'nvcc -forward-unknown-to-host-compiler -foo bar a.cu' will report an error for 'bar' argument.
'nvcc -forward-unknown-to-host-compiler -foo -bar a.cu' will forward '-foo' and '-bar' to host compiler.
我正在尝试为虚拟 class 生成 python 绑定,它需要使用支持 cuda 的编译器进行编译。我正在使用 cmake 3.12.0、pybind11 v2.2.3 和 nvcc 7.5.17。编译失败,因为 -flto
和 -fno-fat-lto-objects
等选项直接传递给 nvcc,而 nvcc 无法识别它们。
这是一个(最小的)例子:
库达代码:
//Adder.hpp
#include <thrust/host_vector.h>
struct Adder {
thrust::host_vector<float> a_h;
thrust::host_vector<float> b_h;
thrust::host_vector<float> r_h;
int N;
Adder(int N);
void set_a(float const * const in);
void set_b(float const * const in);
void calc();
void calc_gpu();
};
//Adder.cu
#include "Adder.hpp"
#include <thrust/device_vector.h>
Adder::Adder(int N): N(N),a_h(N),b_h(N),r_h(N) {}
void Adder::set_a(float const * const in) {
for (int i=0; i<N; ++i) {
a_h[i] = in[i];
}
}
void Adder::set_b(float const * const in) {
for (int i=0; i<N; ++i) {
b_h[i] = in[i];
}
}
void Adder::calc() {
for (int i=0; i<N; ++i) {
r_h[i] = a_h[i]+b_h[i];
}
}
void Adder::calc_gpu() {
thrust::device_vector<float> a_d(a_h);
thrust::device_vector<float> b_d(b_h);
thrust::device_vector<float> r_d(r_h);
thrust::transform(a_d.begin(), a_d.end(), b_d.begin(), r_d.begin(),thrust::plus<float>());
r_h = r_d;
}
绑定代码:
#include "Adder.hpp"
#include "lib/include/pybind11/pybind11.h"
#include "lib/include/pybind11/numpy.h"
#include <stdexcept>
namespace py = pybind11;
void bind_Adder(py::module& m) {
py::class_<Adder>(m,"Adder","Module docstring")
.def(py::init<int>(), py::arg("N"), "Init Adder")
.def(
"set_a"
, [](Adder& self, py::array_t<float, py::array::c_style | py::array::forcecast> in) {
py::buffer_info ai = in.request();
if (ai.ndim!=1 || ai.shape[0]!=self.N || ai.strides[0]!=sizeof(float)) {
throw std::runtime_error("Shape of given numpy array must be (N,)! Type must be float.");
}
self.set_a(static_cast<float const * const>(ai.ptr));
}
, py::arg("in")
, "Set a."
)
.def(
"set_b"
, [](Adder& self, py::array_t<float, py::array::c_style | py::array::forcecast> in) {
py::buffer_info ai = in.request();
if (ai.ndim!=1 || ai.shape[0]!=self.N || ai.strides[0]!=sizeof(float))
throw std::runtime_error("Shape of given numpy array must be (N,)! Type must be float.");
self.set_b(static_cast<float const * const>(ai.ptr));
}
, py::arg("in")
, "Set b."
)
.def(
"get_r"
, [](Adder& self, py::array_t<float> x) {
auto r = x.mutable_unchecked<1>();
for (ssize_t i = 0; i < r.shape(0); i++) {
r(i) = self.r_h[i];
}
}
, py::arg("x").noconvert())
.def("calc", &Adder::calc, "Calculate on CPU.")
.def("calc_gpu", &Adder::calc_gpu, "Calculate on GPU.");
}
PYBIND11_MODULE(dummy, m) {
bind_Adder(m);
}
CMakeLists.txt:
CMAKE_MINIMUM_REQUIRED(VERSION 3.11)
project(dummy LANGUAGES CXX CUDA)
set(PYBIND11_CPP_STANDARD -std=c++11)
add_subdirectory(lib/pybind11)
pybind11_add_module(dummy
Adder.pybind.cpp
Adder.cu
)
用例如cmake ../src && make -VERBOSE=1
失败。 Adder.pybind.cpp
的目标文件已成功生成。 Adder.cu
的编译失败:
/usr/bin/nvcc -Ddummy_EXPORTS -I/home/user/projects/pybind11_cuda_cmake/lib/pybind11/include -I/home/user/.anaconda3/include/python3.6m -Xcompiler=-fPIC -std=c++11 -flto -fno-fat-lto-objects -x cu -c /home/user/projects/pybind11_cuda_cmake/Adder.cu -o CMakeFiles/dummy.dir/Adder.cu.o
nvcc fatal : Unknown option 'flto'
我试图禁用自动传播到 nvcc,但没有成功。
set(CUDA_PROPAGATE_HOST_FLAGS FALSE)
set(CUDAFLAGS "-Xcompiler -fPIC -Xcompiler -flto -Xcompiler=-fvisibility=hidden")
有人知道怎么做吗?
更新: 使用 target_set_properties 的(可能)更清晰的版本,从而让 cmake 整理出实际的 compiler/linker 标志。
cmake_minimum_required(VERSION 3.12)
project(dummy LANGUAGES CXX CUDA)
set(PYBIND11_CPP_STANDARD -std=c++11)
add_subdirectory(lib/pybind11)
add_library(dummycu STATIC
Adder.cu
)
set_target_properties(dummycu PROPERTIES
POSITION_INDEPENDENT_CODE ON
CUDA_VISIBILITY_PRESET "hidden"
# CUDA_SEPARABLE_COMPILATION ON
)
add_library(dummy MODULE
Adder.pybind.cpp
)
set_target_properties(dummy PROPERTIES
CXX_VISIBILITY_PRESET "hidden"
INTERPROCEDURAL_OPTIMIZATION TRUE
PREFIX "${PYTHON_MODULE_PREFIX}"
SUFFIX "${PYTHON_MODULE_EXTENSION}"
)
target_link_libraries(dummy PRIVATE dummycu)
target_link_libraries(dummy PRIVATE pybind11::module)
我们还可以编译 link 一个目标中的所有内容。
这可能也可以用来修改 pybind11Tools.cmake.
中的 pybind11_add_module
由于 pybind11 无条件地为其 module
目标设置了可见性标志,因此在没有 -Xcompiler
的情况下与 nvcc 一起使用,如果不想更改 pybind11 CMakeLists.txt 或pybind11Tools.cmake。为 module
目标 and/or pybind11_add_module
函数编写自定义版本可能会更好。
cmake_minimum_required(VERSION 3.12)
project(dummy LANGUAGES CXX CUDA)
set(PYBIND11_CPP_STANDARD -std=c++11)
add_subdirectory(lib/pybind11)
##pybind11 sets -fvisibility=hidden in INTERFACE_COMPILE_OPTIONS on it's module target
get_target_property(modifacecopts module INTERFACE_COMPILE_OPTIONS)
list(REMOVE_ITEM modifacecopts "-fvisibility=hidden")
set_target_properties(module PROPERTIES INTERFACE_COMPILE_OPTIONS "${modifacecopts}")
add_library(dummy MODULE
Adder.pybind.cpp
Adder.cu
)
set_target_properties(dummy PROPERTIES
POSITION_INDEPENDENT_CODE ON
CUDA_VISIBILITY_PRESET "hidden"
CXX_VISIBILITY_PRESET "hidden"
INTERPROCEDURAL_OPTIMIZATION TRUE
PREFIX "${PYTHON_MODULE_PREFIX}"
SUFFIX "${PYTHON_MODULE_EXTENSION}"
)
target_link_libraries(dummy PRIVATE pybind11::module)
您可以使用 -forward-unknown-to-host-compiler
标志将 nvcc
未知标志传递给主机编译器。
'nvcc -forward-unknown-to-host-compiler -foo=bar a.cu' will forward '-foo=bar' to host compiler.
'nvcc -forward-unknown-to-host-compiler -foo bar a.cu' will report an error for 'bar' argument.
'nvcc -forward-unknown-to-host-compiler -foo -bar a.cu' will forward '-foo' and '-bar' to host compiler.