nvcc 中级 Link 失败

nvcc Intermediate Link failure

我一直在努力解决我在使用 CMake 构建我的 nvcc 项目时遇到的中间链接错误。我一直在升级以前的项目以利用 CUDA,并且能够从主机代码成功调用该库中的函数。当我尝试从设备代码调用该库的函数时,出现中间链接错误。我用 __device____host__ 描述符注释了所有函数。

附带说明一下,这个一个ROS项目,所以我使用了一些catkin CMake函数。

这是调用主机和设备函数的 ParticleFilter 代码片段:

#include <cuda.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <curand_kernel.h>
#include <iostream>
#include <davinci_kinematics_cuda/davinci_fwd_kinematics.cuh>

__host__
ParticleFilter::ParticleFilter(const unsigned int numParticles, const std::vector<double> &initialJointState, const unsigned int threads,
        const unsigned int blocks) {

    /* random other work here */
    
    // This works fine (compiles and runs), it is calling host code from the other file
    kinematics = davinci_kinematics_cuda::Forward();
    std::cout << kinematics.fwd_kin_solve(initialJointState.data()).translation() << std::endl;
}

__global__
void printParticlesKernel(double *particles, const unsigned int numParticles, const unsigned int dimensions, const size_t pitch) {
    int locationStart = blockIdx.x * blockDim.x + threadIdx.x;
    int stride = blockDim.x * gridDim.x;

    // This fails, will not link
    davinci_kinematics_cuda::Forward kinematics = davinci_kinematics_cuda::Forward(); 

    for (int n = locationStart; n < numParticles; n += stride) {
        double *particle = (double*) ((char*) particles + n * pitch);
        
        /* random other work here */

        // this fails, will not link
        auto translation = kinematics.fwd_kin_solve(particle).translation();
        printf("%f %f %f\n", translation[0], translation[1], translation[2]);
    }
}

这是来自运动学文件:

#include <cuda.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>

namespace davinci_kinematics_cuda {

    // use member fncs to compute and multiply successive transforms
    __host__ __device__
    Forward::Forward() {
        /* random initialization here */
    }

    __host__ __device__
    Eigen::Affine3d Forward::fwd_kin_solve(const double *q_vec, const unsigned int desired_joint) {
        /* other work here */
    }
}

这是 ParticleFilter 文件的相关 CMake 部分。

cmake_minimum_required(VERSION 2.8.10)
project(tool_tracking LANGUAGES CUDA CXX)

# 
find_package(CUDA REQUIRED) 

# set CUDA_NVCC_FLAGS as you would do with CXX/C FLAGS
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CURAND_FLAGS} -fPIC")
set(CUDA_SEPARABLE_COMPILATION ON)

find_package(catkin REQUIRED COMPONENTS
    message_generation
    roscpp
    std_msgs
    sensor_msgs
    geometry_msgs
    cwru_opencv_common
    tool_model
    cwru_davinci_control
    cwru_davinci_kinematics
    xform_utils
    tf
    tool_segmentation
)


catkin_package(
    INCLUDE_DIRS
        include
    LIBRARIES 
        tool_tracking_particle
    CATKIN_DEPENDS
        message_runtime
        std_msgs
        sensor_msgs
        geometry_msgs
        cwru_opencv_common
        tool_model
        cwru_davinci_control
        cwru_davinci_kinematics
        xform_utils
        tf
)

include_directories(SYSTEM ${OpenCV_INCLUDE_DIRS})
include_directories(include ${catkin_INCLUDE_DIRS} tool_model_lib )

cuda_add_executable(test_particlefilter src/ParticleFilter.cu src/Particle.cu)
target_link_libraries(test_particlefilter tool_tracking_particle ${catkin_LIBRARIES} ${OpenCV_LIBRARIES} ${CUDA_LIBRARIES})

这是 CMake 的错误:

/usr/bin/cmake -H/home/ethan/catkin_ws/src/cwru_davinci_tool_tracking/tool_tracking -B/home/ethan/catkin_ws/build/tool_tracking --check-build-system CMakeFiles/Makefile.cmake 0
/usr/bin/cmake -E cmake_progress_start /home/ethan/catkin_ws/build/tool_tracking/CMakeFiles /home/ethan/catkin_ws/build/tool_tracking/CMakeFiles/progress.marks
/usr/bin/make -f CMakeFiles/Makefile2 all
make[1]: Entering directory '/home/ethan/catkin_ws/build/tool_tracking'
/usr/bin/make -f CMakeFiles/test_particlefilter.dir/build.make CMakeFiles/test_particlefilter.dir/depend
make[2]: Entering directory '/home/ethan/catkin_ws/build/tool_tracking'
[ 20%] Building NVCC intermediate link file CMakeFiles/test_particlefilter.dir/test_particlefilter_intermediate_link.o
/usr/local/cuda-11.0/bin/nvcc -lcudadevrt -m64 -ccbin /usr/bin/cc -dlink /home/ethan/catkin_ws/build/tool_tracking/CMakeFiles/test_particlefilter.dir/src/./test_particlefilter_generated_ParticleFilter.cu.o /home/ethan/catkin_ws/build/tool_tracking/CMakeFiles/test_particlefilter.dir/src/./test_particlefilter_generated_Particle.cu.o -o /home/ethan/catkin_ws/build/tool_tracking/CMakeFiles/test_particlefilter.dir/./test_particlefilter_intermediate_link.o -Xcompiler -fPIC
nvlink error   : Undefined reference to '_ZN23davinci_kinematics_cuda7ForwardC1Ev' in '/home/ethan/catkin_ws/build/tool_tracking/CMakeFiles/test_particlefilter.dir/src/./test_particlefilter_generated_ParticleFilter.cu.o'
nvlink error   : Undefined reference to '_ZN23davinci_kinematics_cuda7Forward13fwd_kin_solveEPKdj' in '/home/ethan/catkin_ws/build/tool_tracking/CMakeFiles/test_particlefilter.dir/src/./test_particlefilter_generated_ParticleFilter.cu.o'
CMakeFiles/test_particlefilter.dir/build.make:1468: recipe for target 'CMakeFiles/test_particlefilter.dir/test_particlefilter_intermediate_link.o' failed
make[2]: Leaving directory '/home/ethan/catkin_ws/build/tool_tracking'
make[2]: *** [CMakeFiles/test_particlefilter.dir/test_particlefilter_intermediate_link.o] Error 255
CMakeFiles/Makefile2:67: recipe for target 'CMakeFiles/test_particlefilter.dir/all' failed
make[1]: Leaving directory '/home/ethan/catkin_ws/build/tool_tracking'
make[1]: *** [CMakeFiles/test_particlefilter.dir/all] Error 2
Makefile:140: recipe for target 'all' failed
make: *** [all] Error 2

如何修复未定义引用的错误?似乎是链接错误,但我对编译/链接过程不够熟悉,无法进一步排除故障。如果我需要 post 来自运动学文件的 CMake,我也可以。

这是 key 问题和对 other 这个问题的读者最有帮助的部分。 Catkin 默认将 CMake 配置为构建 shared 库,但 CUDA 可分离编译和 nvlink 仅适用于 static 库。您需要将您的 CUDA 库(在您的情况下,cwru_davinci_kinematics 中的库)始终设置为静态。您可以通过将 STATIC 关键字添加到 add_library 调用来实现,如:

add_library(my_cuda_lib STATIC source1.cu ...)

如果您在 CMake 中“link”到一个带有 CUDA 的共享库,它将忽略它。这实际上是 nvcc 的记录行为。看这里:https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/#libraries

The device linker has the ability to read the static host library formats (.a on Linux and Mac OS X, .lib on Windows). It ignores any dynamic (.so or .dll) libraries.


这里的另一个重要教训是,将 CMake 设置为旧版本必然会导致问题。在重现您的问题时,我被迫从源代码构建 OpenCV 3(它不在 Ubuntu 20.04 LTS 中)并且没有很好的方法来覆盖 3.12 版之前的特定包的搜索路径,它引入了 CMP0074.

升级您的最低 CMake 版本。理想情况下,您将升级到软件存储库中可用的最新版本,并将文件的最小值设置为该版本。与早于 ~3.5 的 CMake 版本兼容有 zero 好处,我认为可以扩展到 3.16(Ubuntu 20.04 LTS 中的版本)。由于您使用的是 CUDA,因此 3.18 是最合适的。更糟糕的是,您的许多项目将最低设置低于 2.8.12;与此版本的兼容性将很快被 CMake 删除。


这是我为使其在 Ubuntu 20.04 LTS 上构建而所做的 确切 更改。我使用了以下构建脚本,放置在 ROS 工作区中并从中执行:

#!/usr/bin/bash

source /opt/ros/noetic/setup.bash

export CUDACXX=/usr/local/cuda/bin/nvcc
export OpenCV_ROOT=$(readlink -f opencv-install)

[ -f "$CUDACXX" ] || { echo "Invalid CUDACXX: $CUDACXX"; exit; }
[ -d "$OpenCV_ROOT" ] || { echo "Invalid OpenCV_ROOT: $OpenCV_ROOT"; exit; }

rm -rf build devel
catkin build tool_tracking --cmake-args \
    -Wno-dev \
    -DCMAKE_POLICY_DEFAULT_CMP0074=NEW \
    -DCMAKE_CUDA_ARCHITECTURES=75

目录opencv-install是我自己构建OpenCV 3创建的(因为Ubuntu20.04只有v4)。其步骤是:

$ git clone -b 3.4.14 git@github.com:opencv/opencv.git
$ git clone -b 3.4.14 git@github.com:opencv/opencv_contrib.git
$ cmake -G Ninja -S opencv -B opencv-build/ -DOPENCV_EXTRA_MODULES_PATH=$(readlink -f opencv_contrib)/modules -DBUILD_opencv_cnn_3dobj=OFF -DBUILD_opencv_face=OFF -DBUILD_opencv_hdf=OFF -DBUILD_opencv_hfs=OFF -DBUILD_opencv_julia=OFF -DBUILD_opencv_matlab=OFF -DBUILD_opencv_ovis=OFF -DBUILD_opencv_reg=OFF -DBUILD_opencv_sfm=OFF -DBUILD_opencv_text=OFF -DBUILD_opencv_wechat_qrcode=OFF -DBUILD_opencv_ximgproc=OFF
$ cmake --build opencv-build
$ cmake --install opencv-build --prefix opencv-install

这将禁用具有 significant/irrelevant 依赖项的额外模块。

该脚本设置环境变量 OpenCV_ROOT 以将 CMake 定向到本地安装的 OpenCV 版本。因为文件中指定的CMake最低版本太低了,我还必须设置CMAKE_POLICY_DEFAULT_CMP0074=NEW,这样OpenCV_ROOT才会被兑现。

以下是我对您的 CMake 代码所做的更改:

src/cwru_davinci_kinematics/CMakeLists.txt

--- a/src/cwru_davinci_kinematics/CMakeLists.txt
+++ b/src/cwru_davinci_kinematics/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 2.8.10)
+cmake_minimum_required(VERSION 3.18)
 project(cwru_davinci_kinematics)
 
 #This is needed as part of the migration to ros jade and later
@@ -26,18 +26,16 @@ find_package(catkin REQUIRED COMPONENTS roscpp roslib roslint tf tf2 tf2_eigen)
 
 SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -std=gnu++0x")
 
-# 
-find_package(CUDA) 
-message(STATUS "CUDA_FOUND=${CUDA_FOUND}")
-if(CUDA_FOUND)
-       message(STATUS "Found CUDA, setting nvcc compilation flags")
-       
-       # set CUDA_NVCC_FLAGS as you would do with CXX/C FLAGS         
-       set(CUDA_NVCC_FLAGS CACHE STRING "nvcc flags" FORCE)
-       set(CUDA_VERBOSE_BUILD ON CACHE BOOL "nvcc verbose" FORCE)
+include(CheckLanguage)
+check_language(CUDA)
+if (CMAKE_CUDA_COMPILER)
+  enable_language(CUDA)
+
        # fPIC fixes some linker issues with nvcc code / objects
-       set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CURAND_FLAGS} -fPIC")
-       set(CUDA_SEPARABLE_COMPILATION ON)
+       set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -fPIC")
+       set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
+
+  find_package(CUDAToolkit REQUIRED)
 endif()
 
 include_directories(
@@ -48,7 +46,7 @@ include_directories(
   ${YAML_CPP_INCLUDE_DIRS}
 )
 
-if (CUDA_FOUND)
+if (CMAKE_CUDA_COMPILER)
        catkin_package(
                DEPENDS ${Eigen3_DEP}
                LIBRARIES 
@@ -82,14 +80,17 @@ target_link_libraries(davinci_kinematics
   davinci_kinematic_definitions
   )
 
-if (CUDA_FOUND)
-       cuda_add_library(davinci_kinematics_cuda src/davinci_fwd_kinematics.cu)
-       cuda_add_library(davinci_kinematics_definitions_cuda src/davinci_kinematic_definitions.cu)
-       
-       target_link_libraries(davinci_kinematics_cuda
-               ${catkin_LIBRARIES}
-               davinci_kinematics_definitions_cuda
-       )
+if (CMAKE_CUDA_COMPILER)
+  add_library(davinci_kinematics_cuda STATIC src/davinci_fwd_kinematics.cu)
+  add_library(davinci_kinematics_definitions_cuda STATIC src/davinci_kinematic_definitions.cu)
+
+  target_link_libraries(
+    davinci_kinematics_cuda
+    PRIVATE
+      CUDA::curand
+      ${catkin_LIBRARIES}
+      davinci_kinematics_definitions_cuda
+  )
 endif()
 
 # Examples

这里重要的几行是:

  add_library(davinci_kinematics_cuda STATIC src/davinci_fwd_kinematics.cu)
  add_library(davinci_kinematics_definitions_cuda STATIC src/davinci_kinematic_definitions.cu)

我在这里也对 CMake 代码进行了现代化改造,因为内置的 CUDA 语言支持已经相当先进。

src/cwru_davinci_tool_tracking/tool_tracking/CMakeLists.txt

--- a/src/cwru_davinci_tool_tracking/tool_tracking/CMakeLists.txt
+++ b/src/cwru_davinci_tool_tracking/tool_tracking/CMakeLists.txt
@@ -1,18 +1,11 @@
-cmake_minimum_required(VERSION 2.8.10)
-project(tool_tracking LANGUAGES CUDA CXX)
+cmake_minimum_required(VERSION 3.18)
+project(tool_tracking LANGUAGES C CXX CUDA)
 
-# 
-find_package(CUDA REQUIRED) 
+set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -fPIC")
+set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
 
-# set CUDA_NVCC_FLAGS as you would do with CXX/C FLAGS
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CURAND_FLAGS} -fPIC")
-set(CUDA_SEPARABLE_COMPILATION ON)
+find_package(OpenCV 3 REQUIRED)
 
-#find_package(catkin_simple REQUIRED)
-## Find catkin macros and libraries
-## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
-## is used, also find other catkin packages
-find_package(OpenCV REQUIRED)
 find_package(catkin REQUIRED COMPONENTS
        message_generation
        roscpp
@@ -28,11 +21,12 @@ find_package(catkin REQUIRED COMPONENTS
        tool_segmentation
 )
 
+find_package(CUDAToolkit REQUIRED)
 
 catkin_package(
        INCLUDE_DIRS
                include
-       LIBRARIES 
+       LIBRARIES
                tool_tracking_particle
        CATKIN_DEPENDS
                message_runtime
@@ -47,13 +41,7 @@ catkin_package(
                tf
 )
 
-include_directories(SYSTEM ${OpenCV_INCLUDE_DIRS})
-include_directories(include ${catkin_INCLUDE_DIRS} tool_model_lib )
-
-#cuda_add_library(tool_tracking_particle src/ParticleFilter.cu src/Particle.cu)
-#add_executable(particle src/tracking_particle.cpp)
-#target_link_libraries(particle tool_tracking_particle ${catkin_LIBRARIES} ${OpenCV_LIBRARIES} davinci_kinematics_cuda 
-#                      davinci_kinematics_definitions_cuda)
-
-cuda_add_executable(test_particlefilter src/ParticleFilter.cu src/Particle.cu)
-target_link_libraries(test_particlefilter tool_tracking_particle ${catkin_LIBRARIES} ${OpenCV_LIBRARIES} ${CUDA_LIBRARIES})
+add_executable(test_particlefilter src/ParticleFilter.cu src/Particle.cu)
+target_include_directories(test_particlefilter SYSTEM PRIVATE ${OpenCV_INCLUDE_DIRS} ${catkin_INCLUDE_DIRS})
+target_include_directories(test_particlefilter PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include)
+target_link_libraries(test_particlefilter PRIVATE ${catkin_LIBRARIES} ${OpenCV_LIBRARIES} CUDA::curand)

我在这里也对 CMake 代码进行了现代化改造,因为内置的 CUDA 语言支持已经相当先进。

其他变化

我在所有其他地方将最低 CMake 版本从 2.8.x 提高到 3.0.2 以抑制警告。我还向所有 find_package(OpenCV ...) 没有版本号的调用添加了版本号 3

Boost 不再有 python3 包;现在只是 python。我对 src/vision_opencv/cv_bridge/CMakeLists.txt 进行了以下更改:

--- a/src/vision_opencv/cv_bridge/CMakeLists.txt
+++ b/src/vision_opencv/cv_bridge/CMakeLists.txt
@@ -1,18 +1,15 @@
-cmake_minimum_required(VERSION 2.8)
+cmake_minimum_required(VERSION 3.0.2)
 project(cv_bridge)
 
 find_package(catkin REQUIRED COMPONENTS rosconsole sensor_msgs)
 
 if(NOT ANDROID)
   find_package(PythonLibs)
-  if(PYTHONLIBS_VERSION_STRING VERSION_LESS 3)
-    find_package(Boost REQUIRED python)
-  else()
-    find_package(Boost REQUIRED python3)
-  endif()
+  find_package(Boost REQUIRED python)
 else()
-find_package(Boost REQUIRED)
+  find_package(Boost REQUIRED)
 endif()
+
 find_package(OpenCV 3 REQUIRED
   COMPONENTS
     opencv_core