使用 CUDA 将 CMakeList.txt 调整为 运行
Adapting CMakeList.txt to run with CUDA
我正在使用 slam 系统,我已经安装了 dso,代码可以在这里看到::
https://github.com/JakobEngel/dso
一切正常,我设法编译并且 运行 没有错误。但是知道我想使用 CUDA 并行化代码。为了能够使用 CUDA,我在调整 CMakeLists.txt 时遇到了很多麻烦。来自 dso 的原始 CMakeLists 可在此处获得:
我正在尝试根据我对另一个作者在另一个 SLAM 系统上的实现所做的更改进行调整:
ORB SLAM 2 CMakeLists.txt using CUDA
现在我的 CMakeLists 是这样的:
SET(PROJECT_NAME DSO)
PROJECT(${PROJECT_NAME})
CMAKE_MINIMUM_REQUIRED(VERSION 2.6)
#set(CMAKE_VERBOSE_MAKEFILE ON)
set(BUILD_TYPE Release)
#set(BUILD_TYPE RelWithDebInfo)
set(EXECUTABLE_OUTPUT_PATH bin)
set(LIBRARY_OUTPUT_PATH lib)
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
# required libraries
#SET(CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH} "/usr/include")
find_package(SuiteParse REQUIRED)
find_package(Eigen3 REQUIRED)
find_package(Boost)
# optional libraries
find_package(LibZip QUIET)
find_package(Pangolin 0.2 QUIET)
find_package(OpenCV QUIET)
#find_package(OpenACC)
# flags
add_definitions("-DENABLE_SSE")
set(CMAKE_CXX_FLAGS
"${SSE_FLAGS} -O3 -g -std=c++11"
)
set(CMAKE_C_FLAGS
"${SSE_FLAGS} -O3 -g -std=c++11"
)
#LIST(APPEND CMAKE_C_FLAGS "-Wall -Wextra -DUSE_NVTX") <<<< Error: doesn't recognize -Wall -Wextra
#LIST(APPEND CMAKE_CXX_FLAGS "-Wall -Wextra -DUSE_NVTX") << Error: doesn't recognize -Wall -Wextra
find_package(CUDA REQUIRED)
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
SET(CUDA_HOST_COMPILER /usr/bin/g++)
LIST(APPEND CUDA_NVCC_FLAGS "--compiler-options -fno-strict-aliasing -use_fast_math -ccbin gcc-5")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -std=c++11")
if (MSVC)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc")
endif (MSVC)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY lib)
cuda_include_directories(
${CUDA_TOOLKIT_ROOT_DIR}/samples/common/inc
)
# Sources files
set(dso_SOURCE_FILES
${PROJECT_SOURCE_DIR}/src/FullSystem/FullSystem.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/FullSystemOptimize.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/FullSystemOptPoint.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/FullSystemDebugStuff.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/FullSystemMarginalize.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/Residuals.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/CoarseTracker.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/CoarseInitializer.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/ImmaturePoint.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/HessianBlocks.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/PixelSelector2.cpp
${PROJECT_SOURCE_DIR}/src/OptimizationBackend/EnergyFunctional.cpp
${PROJECT_SOURCE_DIR}/src/OptimizationBackend/AccumulatedTopHessian.cpp
${PROJECT_SOURCE_DIR}/src/OptimizationBackend/AccumulatedSCHessian.cpp
${PROJECT_SOURCE_DIR}/src/OptimizationBackend/EnergyFunctionalStructs.cpp
${PROJECT_SOURCE_DIR}/src/util/settings.cpp
${PROJECT_SOURCE_DIR}/src/util/Undistort.cpp
${PROJECT_SOURCE_DIR}/src/util/globalCalib.cpp
)
include_directories(
${PROJECT_SOURCE_DIR}/src
${PROJECT_SOURCE_DIR}/thirdparty/Sophus
${PROJECT_SOURCE_DIR}/thirdparty/sse2neon
${EIGEN3_INCLUDE_DIR}
)
# decide if we have pangolin
if (Pangolin_FOUND)
message("--- found PANGOLIN, compiling dso_pangolin library.")
include_directories( ${Pangolin_INCLUDE_DIRS} )
set(dso_pangolin_SOURCE_FILES
${PROJECT_SOURCE_DIR}/src/IOWrapper/Pangolin/KeyFrameDisplay.cpp
${PROJECT_SOURCE_DIR}/src/IOWrapper/Pangolin/PangolinDSOViewer.cpp)
set(HAS_PANGOLIN 1)
else ()
message("--- could not find PANGOLIN, not compiling dso_pangolin library.")
message(" this means there will be no 3D display / GUI available for dso_dataset.")
set(dso_pangolin_SOURCE_FILES )
set(HAS_PANGOLIN 0)
endif ()
# decide if we have openCV
if (OpenCV_FOUND)
message("--- found OpenCV, compiling dso_opencv library.")
include_directories( ${OpenCV_INCLUDE_DIRS} )
set(dso_opencv_SOURCE_FILES
${PROJECT_SOURCE_DIR}/src/IOWrapper/OpenCV/ImageDisplay_OpenCV.cpp
${PROJECT_SOURCE_DIR}/src/IOWrapper/OpenCV/ImageRW_OpenCV.cpp)
set(HAS_OPENCV 1)
else ()
message("--- could not find OpenCV, not compiling dso_opencv library.")
message(" this means there will be no image display, and image read / load functionality.")
set(dso_opencv_SOURCE_FILES
${PROJECT_SOURCE_DIR}/src/IOWrapper/ImageDisplay_dummy.cpp
${PROJECT_SOURCE_DIR}/src/IOWrapper/ImageRW_dummy.cpp)
set(HAS_OPENCV 0)
endif ()
# decide if we have ziplib.
if (LIBZIP_LIBRARY)
message("--- found ziplib (${LIBZIP_VERSION}), compiling with zip capability.")
add_definitions(-DHAS_ZIPLIB=1)
include_directories( ${LIBZIP_INCLUDE_DIR_ZIP} ${LIBZIP_INCLUDE_DIR_ZIPCONF} )
else()
message("--- not found ziplib (${LIBZIP_LIBRARY}), compiling without zip capability.")
set(LIBZIP_LIBRARY "")
endif()
# compile main library.
include_directories( ${CSPARSE_INCLUDE_DIR} ${CHOLMOD_INCLUDE_DIR})
cuda_add_library(dso SHARED ${dso_SOURCE_FILES} ${dso_opencv_SOURCE_FILES} ${dso_pangolin_SOURCE_FILES}
${PROJECT_SOURCE_DIR}/src/teste.cu
)
#set_property( TARGET dso APPEND_STRING PROPERTY COMPILE_FLAGS -Wall )
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") # OSX
set(BOOST_THREAD_LIBRARY boost_thread-mt)
else()
set(BOOST_THREAD_LIBRARY boost_thread)
endif()
# build main executable (only if we have both OpenCV and Pangolin)
if (OpenCV_FOUND AND Pangolin_FOUND)
message("--- compiling dso_dataset.")
add_executable(dso_dataset ${PROJECT_SOURCE_DIR}/src/main_dso_pangolin.cpp)
target_link_libraries(dso_dataset dso boost_system cxsparse ${BOOST_THREAD_LIBRARY} ${LIBZIP_LIBRARY} ${Pangolin_LIBRARIES} ${OpenCV_LIBS})
else()
message("--- not building dso_dataset, since either don't have openCV or Pangolin.")
endif()
unset(CMAKE_RUNTIME_OUTPUT_DIRECTORY)
所以,'main_dso_pangolin.cpp' 是我的主要文件。此时,只需更改代码即可编译。但是我想尝试一下我是否能够编写一些 CUDA 代码。为此,我创建了一个 'teste.cu' 文件,该文件与其中一个 cuda 示例具有相同的代码,如下所示:
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
// CUDA runtime
#include </usr/local/cuda-9.0/include/cuda_runtime.h>
#include <cuda.h>
// helper functions and utilities to work with CUDA
#include </usr/local/cuda-9.0/samples/common/inc/helper_functions.h>
#include </usr/local/cuda-9.0/samples/common/inc/helper_cuda.h>
__global__ static void timedReduction(const float *input, float *output, clock_t *timer)
{
// __shared__ float shared[2 * blockDim.x];
extern __shared__ float shared[];
const int tid = threadIdx.x;
const int bid = blockIdx.x;
if (tid == 0) timer[bid] = clock();
// Copy input.
shared[tid] = input[tid];
shared[tid + blockDim.x] = input[tid + blockDim.x];
// Perform reduction to find minimum.
for (int d = blockDim.x; d > 0; d /= 2)
{
__syncthreads();
if (tid < d)
{
float f0 = shared[tid];
float f1 = shared[tid + d];
if (f1 < f0)
{
shared[tid] = f1;
}
}
}
// Write result.
if (tid == 0) output[bid] = shared[0];
__syncthreads();
if (tid == 0) timer[bid+gridDim.x] = clock();
}
#define NUM_BLOCKS 64
#define NUM_THREADS 256
void xx(int argc, char** argv){
printf("CUDA Clock sample\n");
// This will pick the best possible CUDA capable device
int dev = findCudaDevice(argc, (const char **)argv);
float *dinput = NULL;
float *doutput = NULL;
clock_t *dtimer = NULL;
clock_t timer[NUM_BLOCKS * 2];
float input[NUM_THREADS * 2];
for (int i = 0; i < NUM_THREADS * 2; i++)
{
input[i] = (float)i;
}
checkCudaErrors(cudaMalloc((void **)&dinput, sizeof(float) * NUM_THREADS * 2));
checkCudaErrors(cudaMalloc((void **)&doutput, sizeof(float) * NUM_BLOCKS));
checkCudaErrors(cudaMalloc((void **)&dtimer, sizeof(clock_t) * NUM_BLOCKS * 2));
checkCudaErrors(cudaMemcpy(dinput, input, sizeof(float) * NUM_THREADS * 2, cudaMemcpyHostToDevice));
timedReduction<<<NUM_BLOCKS, NUM_THREADS, sizeof(float) * 2 *NUM_THREADS>>>(dinput, doutput, dtimer);
checkCudaErrors(cudaMemcpy(timer, dtimer, sizeof(clock_t) * NUM_BLOCKS * 2, cudaMemcpyDeviceToHost));
checkCudaErrors(cudaFree(dinput));
checkCudaErrors(cudaFree(doutput));
checkCudaErrors(cudaFree(dtimer));
long double avgElapsedClocks = 0;
for (int i = 0; i < NUM_BLOCKS; i++)
{
avgElapsedClocks += (long double) (timer[i + NUM_BLOCKS] - timer[i]);
}
avgElapsedClocks = avgElapsedClocks/NUM_BLOCKS;
printf("Average clocks/block = %Lf\n", avgElapsedClocks);
}
在我的主要工作中,我做的第一件事就是调用这个函数。这一次,当我做 'cmake' 和 'make 我得到这样的错误:
/home/cesar/Documents/dso/src/teste.cu:18:21: error: ‘threadIdx’ was not declared in this scope
const int tid = threadIdx.x;
/home/cesar/Documents/dso/src/teste.cu:19:21: error: ‘blockIdx’ was not declared in this scope
const int bid = blockIdx.x;
我已经正确安装了 CUDA 工具包,但这里是版本:
cesar@cesar-X550JX:/usr/local/cuda/bin$ /usr/local/cuda/bin/nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2017 NVIDIA Corporation
Built on Fri_Sep__1_21:08:03_CDT_2017
Cuda compilation tools, release 9.0, V9.0.176
你认为我做错了什么或者我错过了什么?由于其复杂性和定义明确的结构,我在适应 CMakeLists.txt 时遇到很多困难。
--- 编辑 ---
运行 make -j VERBOSE=1 我收到这条消息,告诉我正在使用常规的 c++ 编译器:
/usr/bin/c++ -fPIC -O3 -g -std=c++11 -D_FORCE_INLINES -shared -Wl,-soname,libdso.so -o lib/libdso.so CMakeFiles/dso.dir/src/FullSystem/FullSystem.cpp.o CMakeFiles/dso.dir/src/FullSystem/FullSystemOptimize.cpp.o CMakeFiles/dso.dir/src/FullSystem/FullSystemOptPoint.cpp.o CMakeFiles/dso.dir/src/FullSystem/FullSystemDebugStuff.cpp.o CMakeFiles/dso.dir/src/FullSystem/FullSystemMarginalize.cpp.o CMakeFiles/dso.dir/src/FullSystem/Residuals.cpp.o CMakeFiles/dso.dir/src/FullSystem/CoarseTracker.cpp.o CMakeFiles/dso.dir/src/FullSystem/CoarseInitializer.cpp.o CMakeFiles/dso.dir/src/FullSystem/ImmaturePoint.cpp.o CMakeFiles/dso.dir/src/FullSystem/HessianBlocks.cpp.o CMakeFiles/dso.dir/src/FullSystem/PixelSelector2.cpp.o CMakeFiles/dso.dir/src/OptimizationBackend/EnergyFunctional.cpp.o CMakeFiles/dso.dir/src/OptimizationBackend/AccumulatedTopHessian.cpp.o CMakeFiles/dso.dir/src/OptimizationBackend/AccumulatedSCHessian.cpp.o CMakeFiles/dso.dir/src/OptimizationBackend/EnergyFunctionalStructs.cpp.o CMakeFiles/dso.dir/src/util/settings.cpp.o CMakeFiles/dso.dir/src/util/Undistort.cpp.o CMakeFiles/dso.dir/src/util/globalCalib.cpp.o CMakeFiles/dso.dir/src/IOWrapper/OpenCV/ImageDisplay_OpenCV.cpp.o CMakeFiles/dso.dir/src/IOWrapper/OpenCV/ImageRW_OpenCV.cpp.o CMakeFiles/dso.dir/src/IOWrapper/Pangolin/KeyFrameDisplay.cpp.o CMakeFiles/dso.dir/src/IOWrapper/Pangolin/PangolinDSOViewer.cpp.o CMakeFiles/dso.dir/src/dso_generated_teste.cu.o /usr/local/cuda/lib64/libcudart_static.a -lpthread -ldl -lrt
[ 96%] Building CXX object CMakeFiles/dso_dataset.dir/src/main_dso_pangolin.cpp.o
/usr/bin/c++ -DENABLE_SSE -DHAS_ZIPLIB=1 -I/usr/include/opencv -I/home/cesar/Documents/dso/src -I/home/cesar/Documents/dso/thirdparty/Sophus -I/home/cesar/Documents/dso/thirdparty/sse2neon -I/usr/include/eigen3 -I/home/cesar/Documents/Pangolin/include -I/home/cesar/Documents/Pangolin/build/src/include -I/usr/local/include -I/usr/include/suitesparse -I/usr/local/cuda/include -O3 -g -std=c++11 -D_FORCE_INLINES -o CMakeFiles/dso_dataset.dir/src/main_dso_pangolin.cpp.o -c /home/cesar/Documents/dso/src/main_dso_pangolin.cpp
我还尝试将 .cpp 文件与 .cu 文件分开,对 .cpp 使用 add_library,对 .cu 文件使用 cuda_add_library,如下所示:
add_library(dso ${dso_SOURCE_FILES} ${dso_opencv_SOURCE_FILES} ${dso_pangolin_SOURCE_FILES})
cuda_add_library(my_cuda_lib ${PROJECT_SOURCE_DIR}/src/teste.cu)
然后在target_link_libraries中使用my_cuda_lib,像这样:
target_link_libraries(dso_dataset dso boost_system cxsparse ${BOOST_THREAD_LIBRARY} ${LIBZIP_LIBRARY} ${Pangolin_LIBRARIES} ${OpenCV_LIBS} ${CUDA_LIBRARIES} my_cuda_lib)
但还是出现同样的错误。
-- 编辑:MCVE ---
为了证明我的错误,我创建了一个简单的例子。我有 2 个简单文件,我的主要文件是 .cpp,我的 cuda 文件是 .cu。我的主要只是调用另一个文件上的函数,如下所示:
#include <iostream>
#include "hello_world.cu"
using namespace std;
int main()
{
teste();
return 0;
}
我的 .cu 文件如下所示:
#include <stdio.h>
#include <iostream>
// CUDA runtime
#include </usr/local/cuda-9.0/include/cuda_runtime.h>
// helper functions and utilities to work with CUDA
#include </usr/local/cuda-9.0/samples/common/inc/helper_functions.h>
#include </usr/local/cuda-9.0/samples/common/inc/helper_cuda.h>
__global__ void kernel (void){
extern __shared__ float shared[];
const int tid = threadIdx.x;
const int bid = blockIdx.x;
}
int teste( void ) {
kernel<<<1,1>>>();
printf( "Hello, World!\n" );
return 0;
}
我编译的 CMakeLists.txt 看起来像这样:
cmake_minimum_required(VERSION 2.8)
set(CUDA_HOST_COMPILER /usr/bin/g++-5)
find_package(CUDA QUIET REQUIRED)
# Pass options to NVCC
set(
CUDA_NVCC_FLAGS
${CUDA_NVCC_FLAGS};
-O3
)
# For compilation ...
# Specify target & source files to compile it from
cuda_add_executable(
helloworld
hello_world.cu
teste.cpp
)
制作 cmake 和 运行 "cmake --build ." 之后(我不知道为什么必须是这个命令,通常我只是做 make -j,但在这个例子中只有这个有效)我得到与我的项目相同的错误,“threadIdx”未在此范围内声明,与 'blockIdx' 等相同。
既然您在主代码中包含了 hello_world.cu 文件,那么您希望使用 nvcc 编译器对其进行编译。要实现这一点,请将 teste.cpp 文件的名称更改为 teste.cu(否则将使用 g++)。
同时从 CMakeLists.txt 中删除 'hello_world.cu'(它已经包含在 teste 文件中)以得到如下内容:
cuda_add_executable(
helloworld
teste.cu
)
那么应该可以了。
-- 编辑:附加问题--
如果您想保留您的 .cpp 文件,那么您需要将 g++ 可以为您做的和 nvcc 应该做的分开。所以你可以在你的项目中引入额外的 hello_world.h 文件:
#ifndef HELLO_WORLD_H
#define HELLO_WORLD_H
int teste();
#endif
将其包含在您的 teste.cpp 中:
#include <iostream>
#include "hello_world.h"
using namespace std;
int main()
{
teste();
return 0;
}
然后你的 CMakeLists.txt 看起来像你原来的例子:
...
cuda_add_executable(
helloworld
teste.cpp
hello_world.cu
)
在这种情况下 hello_world.cu 将使用 nvcc 编译,然后 teste.cpp 的编译和链接将由 g++ 完成(在这种情况下这是可能的,因为没有 CUDA 代码在 teste.cpp).
我正在使用 slam 系统,我已经安装了 dso,代码可以在这里看到::
https://github.com/JakobEngel/dso
一切正常,我设法编译并且 运行 没有错误。但是知道我想使用 CUDA 并行化代码。为了能够使用 CUDA,我在调整 CMakeLists.txt 时遇到了很多麻烦。来自 dso 的原始 CMakeLists 可在此处获得:
我正在尝试根据我对另一个作者在另一个 SLAM 系统上的实现所做的更改进行调整:
ORB SLAM 2 CMakeLists.txt using CUDA
现在我的 CMakeLists 是这样的:
SET(PROJECT_NAME DSO)
PROJECT(${PROJECT_NAME})
CMAKE_MINIMUM_REQUIRED(VERSION 2.6)
#set(CMAKE_VERBOSE_MAKEFILE ON)
set(BUILD_TYPE Release)
#set(BUILD_TYPE RelWithDebInfo)
set(EXECUTABLE_OUTPUT_PATH bin)
set(LIBRARY_OUTPUT_PATH lib)
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
# required libraries
#SET(CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH} "/usr/include")
find_package(SuiteParse REQUIRED)
find_package(Eigen3 REQUIRED)
find_package(Boost)
# optional libraries
find_package(LibZip QUIET)
find_package(Pangolin 0.2 QUIET)
find_package(OpenCV QUIET)
#find_package(OpenACC)
# flags
add_definitions("-DENABLE_SSE")
set(CMAKE_CXX_FLAGS
"${SSE_FLAGS} -O3 -g -std=c++11"
)
set(CMAKE_C_FLAGS
"${SSE_FLAGS} -O3 -g -std=c++11"
)
#LIST(APPEND CMAKE_C_FLAGS "-Wall -Wextra -DUSE_NVTX") <<<< Error: doesn't recognize -Wall -Wextra
#LIST(APPEND CMAKE_CXX_FLAGS "-Wall -Wextra -DUSE_NVTX") << Error: doesn't recognize -Wall -Wextra
find_package(CUDA REQUIRED)
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
SET(CUDA_HOST_COMPILER /usr/bin/g++)
LIST(APPEND CUDA_NVCC_FLAGS "--compiler-options -fno-strict-aliasing -use_fast_math -ccbin gcc-5")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -std=c++11")
if (MSVC)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc")
endif (MSVC)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY lib)
cuda_include_directories(
${CUDA_TOOLKIT_ROOT_DIR}/samples/common/inc
)
# Sources files
set(dso_SOURCE_FILES
${PROJECT_SOURCE_DIR}/src/FullSystem/FullSystem.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/FullSystemOptimize.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/FullSystemOptPoint.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/FullSystemDebugStuff.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/FullSystemMarginalize.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/Residuals.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/CoarseTracker.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/CoarseInitializer.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/ImmaturePoint.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/HessianBlocks.cpp
${PROJECT_SOURCE_DIR}/src/FullSystem/PixelSelector2.cpp
${PROJECT_SOURCE_DIR}/src/OptimizationBackend/EnergyFunctional.cpp
${PROJECT_SOURCE_DIR}/src/OptimizationBackend/AccumulatedTopHessian.cpp
${PROJECT_SOURCE_DIR}/src/OptimizationBackend/AccumulatedSCHessian.cpp
${PROJECT_SOURCE_DIR}/src/OptimizationBackend/EnergyFunctionalStructs.cpp
${PROJECT_SOURCE_DIR}/src/util/settings.cpp
${PROJECT_SOURCE_DIR}/src/util/Undistort.cpp
${PROJECT_SOURCE_DIR}/src/util/globalCalib.cpp
)
include_directories(
${PROJECT_SOURCE_DIR}/src
${PROJECT_SOURCE_DIR}/thirdparty/Sophus
${PROJECT_SOURCE_DIR}/thirdparty/sse2neon
${EIGEN3_INCLUDE_DIR}
)
# decide if we have pangolin
if (Pangolin_FOUND)
message("--- found PANGOLIN, compiling dso_pangolin library.")
include_directories( ${Pangolin_INCLUDE_DIRS} )
set(dso_pangolin_SOURCE_FILES
${PROJECT_SOURCE_DIR}/src/IOWrapper/Pangolin/KeyFrameDisplay.cpp
${PROJECT_SOURCE_DIR}/src/IOWrapper/Pangolin/PangolinDSOViewer.cpp)
set(HAS_PANGOLIN 1)
else ()
message("--- could not find PANGOLIN, not compiling dso_pangolin library.")
message(" this means there will be no 3D display / GUI available for dso_dataset.")
set(dso_pangolin_SOURCE_FILES )
set(HAS_PANGOLIN 0)
endif ()
# decide if we have openCV
if (OpenCV_FOUND)
message("--- found OpenCV, compiling dso_opencv library.")
include_directories( ${OpenCV_INCLUDE_DIRS} )
set(dso_opencv_SOURCE_FILES
${PROJECT_SOURCE_DIR}/src/IOWrapper/OpenCV/ImageDisplay_OpenCV.cpp
${PROJECT_SOURCE_DIR}/src/IOWrapper/OpenCV/ImageRW_OpenCV.cpp)
set(HAS_OPENCV 1)
else ()
message("--- could not find OpenCV, not compiling dso_opencv library.")
message(" this means there will be no image display, and image read / load functionality.")
set(dso_opencv_SOURCE_FILES
${PROJECT_SOURCE_DIR}/src/IOWrapper/ImageDisplay_dummy.cpp
${PROJECT_SOURCE_DIR}/src/IOWrapper/ImageRW_dummy.cpp)
set(HAS_OPENCV 0)
endif ()
# decide if we have ziplib.
if (LIBZIP_LIBRARY)
message("--- found ziplib (${LIBZIP_VERSION}), compiling with zip capability.")
add_definitions(-DHAS_ZIPLIB=1)
include_directories( ${LIBZIP_INCLUDE_DIR_ZIP} ${LIBZIP_INCLUDE_DIR_ZIPCONF} )
else()
message("--- not found ziplib (${LIBZIP_LIBRARY}), compiling without zip capability.")
set(LIBZIP_LIBRARY "")
endif()
# compile main library.
include_directories( ${CSPARSE_INCLUDE_DIR} ${CHOLMOD_INCLUDE_DIR})
cuda_add_library(dso SHARED ${dso_SOURCE_FILES} ${dso_opencv_SOURCE_FILES} ${dso_pangolin_SOURCE_FILES}
${PROJECT_SOURCE_DIR}/src/teste.cu
)
#set_property( TARGET dso APPEND_STRING PROPERTY COMPILE_FLAGS -Wall )
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin") # OSX
set(BOOST_THREAD_LIBRARY boost_thread-mt)
else()
set(BOOST_THREAD_LIBRARY boost_thread)
endif()
# build main executable (only if we have both OpenCV and Pangolin)
if (OpenCV_FOUND AND Pangolin_FOUND)
message("--- compiling dso_dataset.")
add_executable(dso_dataset ${PROJECT_SOURCE_DIR}/src/main_dso_pangolin.cpp)
target_link_libraries(dso_dataset dso boost_system cxsparse ${BOOST_THREAD_LIBRARY} ${LIBZIP_LIBRARY} ${Pangolin_LIBRARIES} ${OpenCV_LIBS})
else()
message("--- not building dso_dataset, since either don't have openCV or Pangolin.")
endif()
unset(CMAKE_RUNTIME_OUTPUT_DIRECTORY)
所以,'main_dso_pangolin.cpp' 是我的主要文件。此时,只需更改代码即可编译。但是我想尝试一下我是否能够编写一些 CUDA 代码。为此,我创建了一个 'teste.cu' 文件,该文件与其中一个 cuda 示例具有相同的代码,如下所示:
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
// CUDA runtime
#include </usr/local/cuda-9.0/include/cuda_runtime.h>
#include <cuda.h>
// helper functions and utilities to work with CUDA
#include </usr/local/cuda-9.0/samples/common/inc/helper_functions.h>
#include </usr/local/cuda-9.0/samples/common/inc/helper_cuda.h>
__global__ static void timedReduction(const float *input, float *output, clock_t *timer)
{
// __shared__ float shared[2 * blockDim.x];
extern __shared__ float shared[];
const int tid = threadIdx.x;
const int bid = blockIdx.x;
if (tid == 0) timer[bid] = clock();
// Copy input.
shared[tid] = input[tid];
shared[tid + blockDim.x] = input[tid + blockDim.x];
// Perform reduction to find minimum.
for (int d = blockDim.x; d > 0; d /= 2)
{
__syncthreads();
if (tid < d)
{
float f0 = shared[tid];
float f1 = shared[tid + d];
if (f1 < f0)
{
shared[tid] = f1;
}
}
}
// Write result.
if (tid == 0) output[bid] = shared[0];
__syncthreads();
if (tid == 0) timer[bid+gridDim.x] = clock();
}
#define NUM_BLOCKS 64
#define NUM_THREADS 256
void xx(int argc, char** argv){
printf("CUDA Clock sample\n");
// This will pick the best possible CUDA capable device
int dev = findCudaDevice(argc, (const char **)argv);
float *dinput = NULL;
float *doutput = NULL;
clock_t *dtimer = NULL;
clock_t timer[NUM_BLOCKS * 2];
float input[NUM_THREADS * 2];
for (int i = 0; i < NUM_THREADS * 2; i++)
{
input[i] = (float)i;
}
checkCudaErrors(cudaMalloc((void **)&dinput, sizeof(float) * NUM_THREADS * 2));
checkCudaErrors(cudaMalloc((void **)&doutput, sizeof(float) * NUM_BLOCKS));
checkCudaErrors(cudaMalloc((void **)&dtimer, sizeof(clock_t) * NUM_BLOCKS * 2));
checkCudaErrors(cudaMemcpy(dinput, input, sizeof(float) * NUM_THREADS * 2, cudaMemcpyHostToDevice));
timedReduction<<<NUM_BLOCKS, NUM_THREADS, sizeof(float) * 2 *NUM_THREADS>>>(dinput, doutput, dtimer);
checkCudaErrors(cudaMemcpy(timer, dtimer, sizeof(clock_t) * NUM_BLOCKS * 2, cudaMemcpyDeviceToHost));
checkCudaErrors(cudaFree(dinput));
checkCudaErrors(cudaFree(doutput));
checkCudaErrors(cudaFree(dtimer));
long double avgElapsedClocks = 0;
for (int i = 0; i < NUM_BLOCKS; i++)
{
avgElapsedClocks += (long double) (timer[i + NUM_BLOCKS] - timer[i]);
}
avgElapsedClocks = avgElapsedClocks/NUM_BLOCKS;
printf("Average clocks/block = %Lf\n", avgElapsedClocks);
}
在我的主要工作中,我做的第一件事就是调用这个函数。这一次,当我做 'cmake' 和 'make 我得到这样的错误:
/home/cesar/Documents/dso/src/teste.cu:18:21: error: ‘threadIdx’ was not declared in this scope
const int tid = threadIdx.x;
/home/cesar/Documents/dso/src/teste.cu:19:21: error: ‘blockIdx’ was not declared in this scope
const int bid = blockIdx.x;
我已经正确安装了 CUDA 工具包,但这里是版本:
cesar@cesar-X550JX:/usr/local/cuda/bin$ /usr/local/cuda/bin/nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2017 NVIDIA Corporation
Built on Fri_Sep__1_21:08:03_CDT_2017
Cuda compilation tools, release 9.0, V9.0.176
你认为我做错了什么或者我错过了什么?由于其复杂性和定义明确的结构,我在适应 CMakeLists.txt 时遇到很多困难。
--- 编辑 ---
运行 make -j VERBOSE=1 我收到这条消息,告诉我正在使用常规的 c++ 编译器:
/usr/bin/c++ -fPIC -O3 -g -std=c++11 -D_FORCE_INLINES -shared -Wl,-soname,libdso.so -o lib/libdso.so CMakeFiles/dso.dir/src/FullSystem/FullSystem.cpp.o CMakeFiles/dso.dir/src/FullSystem/FullSystemOptimize.cpp.o CMakeFiles/dso.dir/src/FullSystem/FullSystemOptPoint.cpp.o CMakeFiles/dso.dir/src/FullSystem/FullSystemDebugStuff.cpp.o CMakeFiles/dso.dir/src/FullSystem/FullSystemMarginalize.cpp.o CMakeFiles/dso.dir/src/FullSystem/Residuals.cpp.o CMakeFiles/dso.dir/src/FullSystem/CoarseTracker.cpp.o CMakeFiles/dso.dir/src/FullSystem/CoarseInitializer.cpp.o CMakeFiles/dso.dir/src/FullSystem/ImmaturePoint.cpp.o CMakeFiles/dso.dir/src/FullSystem/HessianBlocks.cpp.o CMakeFiles/dso.dir/src/FullSystem/PixelSelector2.cpp.o CMakeFiles/dso.dir/src/OptimizationBackend/EnergyFunctional.cpp.o CMakeFiles/dso.dir/src/OptimizationBackend/AccumulatedTopHessian.cpp.o CMakeFiles/dso.dir/src/OptimizationBackend/AccumulatedSCHessian.cpp.o CMakeFiles/dso.dir/src/OptimizationBackend/EnergyFunctionalStructs.cpp.o CMakeFiles/dso.dir/src/util/settings.cpp.o CMakeFiles/dso.dir/src/util/Undistort.cpp.o CMakeFiles/dso.dir/src/util/globalCalib.cpp.o CMakeFiles/dso.dir/src/IOWrapper/OpenCV/ImageDisplay_OpenCV.cpp.o CMakeFiles/dso.dir/src/IOWrapper/OpenCV/ImageRW_OpenCV.cpp.o CMakeFiles/dso.dir/src/IOWrapper/Pangolin/KeyFrameDisplay.cpp.o CMakeFiles/dso.dir/src/IOWrapper/Pangolin/PangolinDSOViewer.cpp.o CMakeFiles/dso.dir/src/dso_generated_teste.cu.o /usr/local/cuda/lib64/libcudart_static.a -lpthread -ldl -lrt
[ 96%] Building CXX object CMakeFiles/dso_dataset.dir/src/main_dso_pangolin.cpp.o
/usr/bin/c++ -DENABLE_SSE -DHAS_ZIPLIB=1 -I/usr/include/opencv -I/home/cesar/Documents/dso/src -I/home/cesar/Documents/dso/thirdparty/Sophus -I/home/cesar/Documents/dso/thirdparty/sse2neon -I/usr/include/eigen3 -I/home/cesar/Documents/Pangolin/include -I/home/cesar/Documents/Pangolin/build/src/include -I/usr/local/include -I/usr/include/suitesparse -I/usr/local/cuda/include -O3 -g -std=c++11 -D_FORCE_INLINES -o CMakeFiles/dso_dataset.dir/src/main_dso_pangolin.cpp.o -c /home/cesar/Documents/dso/src/main_dso_pangolin.cpp
我还尝试将 .cpp 文件与 .cu 文件分开,对 .cpp 使用 add_library,对 .cu 文件使用 cuda_add_library,如下所示:
add_library(dso ${dso_SOURCE_FILES} ${dso_opencv_SOURCE_FILES} ${dso_pangolin_SOURCE_FILES})
cuda_add_library(my_cuda_lib ${PROJECT_SOURCE_DIR}/src/teste.cu)
然后在target_link_libraries中使用my_cuda_lib,像这样:
target_link_libraries(dso_dataset dso boost_system cxsparse ${BOOST_THREAD_LIBRARY} ${LIBZIP_LIBRARY} ${Pangolin_LIBRARIES} ${OpenCV_LIBS} ${CUDA_LIBRARIES} my_cuda_lib)
但还是出现同样的错误。
-- 编辑:MCVE ---
为了证明我的错误,我创建了一个简单的例子。我有 2 个简单文件,我的主要文件是 .cpp,我的 cuda 文件是 .cu。我的主要只是调用另一个文件上的函数,如下所示:
#include <iostream>
#include "hello_world.cu"
using namespace std;
int main()
{
teste();
return 0;
}
我的 .cu 文件如下所示:
#include <stdio.h>
#include <iostream>
// CUDA runtime
#include </usr/local/cuda-9.0/include/cuda_runtime.h>
// helper functions and utilities to work with CUDA
#include </usr/local/cuda-9.0/samples/common/inc/helper_functions.h>
#include </usr/local/cuda-9.0/samples/common/inc/helper_cuda.h>
__global__ void kernel (void){
extern __shared__ float shared[];
const int tid = threadIdx.x;
const int bid = blockIdx.x;
}
int teste( void ) {
kernel<<<1,1>>>();
printf( "Hello, World!\n" );
return 0;
}
我编译的 CMakeLists.txt 看起来像这样:
cmake_minimum_required(VERSION 2.8)
set(CUDA_HOST_COMPILER /usr/bin/g++-5)
find_package(CUDA QUIET REQUIRED)
# Pass options to NVCC
set(
CUDA_NVCC_FLAGS
${CUDA_NVCC_FLAGS};
-O3
)
# For compilation ...
# Specify target & source files to compile it from
cuda_add_executable(
helloworld
hello_world.cu
teste.cpp
)
制作 cmake 和 运行 "cmake --build ." 之后(我不知道为什么必须是这个命令,通常我只是做 make -j,但在这个例子中只有这个有效)我得到与我的项目相同的错误,“threadIdx”未在此范围内声明,与 'blockIdx' 等相同。
既然您在主代码中包含了 hello_world.cu 文件,那么您希望使用 nvcc 编译器对其进行编译。要实现这一点,请将 teste.cpp 文件的名称更改为 teste.cu(否则将使用 g++)。
同时从 CMakeLists.txt 中删除 'hello_world.cu'(它已经包含在 teste 文件中)以得到如下内容:
cuda_add_executable(
helloworld
teste.cu
)
那么应该可以了。
-- 编辑:附加问题--
如果您想保留您的 .cpp 文件,那么您需要将 g++ 可以为您做的和 nvcc 应该做的分开。所以你可以在你的项目中引入额外的 hello_world.h 文件:
#ifndef HELLO_WORLD_H
#define HELLO_WORLD_H
int teste();
#endif
将其包含在您的 teste.cpp 中:
#include <iostream>
#include "hello_world.h"
using namespace std;
int main()
{
teste();
return 0;
}
然后你的 CMakeLists.txt 看起来像你原来的例子:
...
cuda_add_executable(
helloworld
teste.cpp
hello_world.cu
)
在这种情况下 hello_world.cu 将使用 nvcc 编译,然后 teste.cpp 的编译和链接将由 g++ 完成(在这种情况下这是可能的,因为没有 CUDA 代码在 teste.cpp).