Arrow 构建在 windows 中失败

Arrow build fails in windows

我正在尝试在 Windows 上离线构建 Apache Arrow。按照网站上的说明,我已经下载了所有依赖项并设置了环境变量:

SET ARROW_BOOST_URL=%ARROW_DEPENDENCY_ROOT%boost-1.67.0.tar.gz
SET ARROW_BROTLI_URL=%ARROW_DEPENDENCY_ROOT%brotli-v1.0.7.tar.gz
SET ARROW_CARES_URL=%ARROW_DEPENDENCY_ROOT%cares-1.15.0.tar.gz
SET ARROW_DOUBLE_CONVERSION_URL=%ARROW_DEPENDENCY_ROOT%double-conversion-v3.1.4.tar.gz
SET ARROW_FLATBUFFERS_URL=%ARROW_DEPENDENCY_ROOT%flatbuffers-v1.10.0.tar.gz
SET ARROW_GBENCHMARK_URL=%ARROW_DEPENDENCY_ROOT%gbenchmark-v1.4.1.tar.gz
SET ARROW_GFLAGS_URL=%ARROW_DEPENDENCY_ROOT%gflags-v2.2.0.tar.gz
SET ARROW_GLOG_URL=%ARROW_DEPENDENCY_ROOT%glog-v0.3.5.tar.gz
SET ARROW_GRPC_URL=%ARROW_DEPENDENCY_ROOT%grpc-v1.20.0.tar.gz
SET ARROW_GTEST_URL=%ARROW_DEPENDENCY_ROOT%gtest-1.8.1.tar.gz
SET ARROW_JEMALLOC_URL=%ARROW_DEPENDENCY_ROOT%jemalloc-5.2.0.tar.gz
SET ARROW_LZ4_URL=%ARROW_DEPENDENCY_ROOT%lz4-v1.8.3.tar.gz
SET ARROW_ORC_URL=%ARROW_DEPENDENCY_ROOT%orc-1.5.5.tar.gz
SET ARROW_PROTOBUF_URL=%ARROW_DEPENDENCY_ROOT%protobuf-v3.7.1.tar.gz
SET ARROW_RAPIDJSON_URL=%ARROW_DEPENDENCY_ROOT%rapidjson-2bbd33b33217ff4a73434ebf10cdac41e2ef5e34.tar.gz
SET ARROW_RE2_URL=%ARROW_DEPENDENCY_ROOT%re2-2019-04-01.tar.gz
SET ARROW_SNAPPY_URL=%ARROW_DEPENDENCY_ROOT%snappy-1.1.7.tar.gz
SET ARROW_THRIFT_URL=%ARROW_DEPENDENCY_ROOT%thrift-0.12.0.tar.gz
SET ARROW_URIPARSER_URL=%ARROW_DEPENDENCY_ROOT%uriparser-0.9.2.tar.gz
SET ARROW_ZLIB_URL=%ARROW_DEPENDENCY_ROOT%zlib-1.2.11.tar.gz
SET ARROW_ZSTD_URL=%ARROW_DEPENDENCY_ROOT%zstd-v1.4.0.tar.gz

我正在运行使用以下命令构建:

if not defined DevEnvDir (
    call "C:\Program Files (x86)\Microsoft Visual Studio17\Enterprise\Common7\Tools\VsDevCmd.bat" -arch=amd64 -host_arch=amd64
)

cmake -G %GENERATOR% %CMAKE_ARGS% ^
   -DCMAKE_BUILD_TYPE=%CONFIGURATION% ^
   -DARROW_BUILD_TESTS=OFF ^
   -DARROW_BUILD_EXAMPLES=OFF ^
   -DARROW_BUILD_STATIC=ON ^
   -DARROW_PARQUET=ON ^
   -DPARQUET_BUILD_EXECUTABLES=OFF ^
   -DARROW_IPC=OFF ^
   -DARROW_BUILD_UTILITIES=OFF ^
   -DARROW_HDFS=OFF ^
   -DARROW_CXXFLAGS="%ARROW_CXXFLAGS%" ^
   -DCMAKE_CXX_FLAGS_RELEASE="/MD %CMAKE_CXX_FLAGS_RELEASE%" ^
   -DCMAKE_VERBOSE_MAKEFILE=OFF ^
   -DARROW_BOOST_USE_SHARED=OFF ^
   -DCMAKE_VERBOSE_MAKEFILE=OFF ^
   ..  

cmake --build . --target install --config %CONFIGURATION%

我的变量设置如下:

set CMAKE_ARGS=%CMAKE_ARGS% ^
    -DARROW_DEPENDENCY_SOURCE=BUNDLED ^
    -DBOOST_SOURCE=SYSTEM ^
    -DTHRIFT_SOURCE=SYSTEM ^
    -DBOOST_DEBUG=OFF ^
    -DBOOST_USE_STATIC_LIBS=ON ^
    -DBOOST_LIBRARYDIR=%BOOST_LIBRARYDIR% ^
    -DBOOST_INCLUDEDIR=%BOOST_INCLUDEDIR% ^
    -DRAPIDJSON_ROOT=%RAPIDJSON_ROOTDIR% ^
    -DTHRIFT_ROOT=%THRIFT_ROOT%

当我 运行 这样做时,我会收到以下错误,抱怨找不到 rapidjson 头文件(即使它们存在于上面的列表中):

cpp\src\arrow\json\chunker.cc(25): fatal error C1083: Cannot open include file: 'rapidjson/reader.h': No such file or directory

我可以通过手动复制源代码旁边的 rapidjson 头文件来解决这个问题(理想情况下我不想这样做,而是构建系统会自动选择它)。即使在这一步之后,我也会收到以下错误:


这似乎是由于 thrift 构建不当造成的:

Performing download step (verify and extract) for 'thrift_ep'
  -- verifying file...
       file='t:\src\apache-arrow\cpp\thirdparty\arrow-dependencies\thrift-0.12.0.tar.gz'
  -- verifying file... done
  -- extracting...
       src='T:/src/apache-arrow/cpp/thirdparty/arrow-dependencies/thrift-0.12.0.tar.gz'
       dst='T:/src/apache-arrow/cpp/build_Release/thrift_ep-prefix/src/thrift_ep'
  -- extracting... [tar xfz]
  -- extracting... [analysis]
  -- extracting... [rename]
  -- extracting... [clean up]
  -- extracting... done
  No update step for 'thrift_ep'
  No patch step for 'thrift_ep'
  Performing configure step for 'thrift_ep'
  -- The C compiler identification is MSVC 19.16.27031.1
  -- The CXX compiler identification is MSVC 19.16.27031.1
  -- Check for working C compiler: C:/Program Files (x86)/Microsoft Visual Studio/2017/Enterprise/VC/Tools/MSVC/14.16.27023/bin/Hostx86/x64/cl.exe
  -- Check for working C compiler: C:/Program Files (x86)/Microsoft Visual Studio/2017/Enterprise/VC/Tools/MSVC/14.16.27023/bin/Hostx86/x64/cl.exe -- works
  -- Detecting C compiler ABI info
  -- Detecting C compiler ABI info - done
  -- Detecting C compile features
  -- Detecting C compile features - done
  -- Check for working CXX compiler: C:/Program Files (x86)/Microsoft Visual Studio/2017/Enterprise/VC/Tools/MSVC/14.16.27023/bin/Hostx86/x64/cl.exe
  -- Check for working CXX compiler: C:/Program Files (x86)/Microsoft Visual Studio/2017/Enterprise/VC/Tools/MSVC/14.16.27023/bin/Hostx86/x64/cl.exe -- works
  -- Detecting CXX compiler ABI info
  -- Detecting CXX compiler ABI info - done
  -- Detecting CXX compile features
  -- Detecting CXX compile features - done
  -- Parsed Thrift package version: 0.12.0
  -- Parsed Thrift version: 0.12.0 (0.2.0)
  -- Setting C++11 as the default language level.
  -- To specify a different C++ language level, set CMAKE_CXX_STANDARD
  BOOST_ROOT:
  Boost_LIBRARY_DIR_RELEASE:
  BOOST_LIBRARYDIR:
  BOOST_INCLUDEDIR:
  Boost_USE_STATIC_LIBS: OFF
  Boost_DEBUG:
  -- libevent NOT found.
  -- Could NOT find RUN_HASKELL (missing: RUN_HASKELL)
  -- Could NOT find CABAL (missing: CABAL)
  -- Looking for arpa/inet.h
  -- Looking for arpa/inet.h - not found
  -- Looking for fcntl.h
  -- Looking for fcntl.h - found
  -- Looking for getopt.h
  -- Looking for getopt.h - not found
  -- Looking for inttypes.h
  -- Looking for inttypes.h - found
  -- Looking for netdb.h
  -- Looking for netdb.h - not found
  -- Looking for netinet/in.h
  -- Looking for netinet/in.h - not found
  -- Looking for signal.h
  -- Looking for signal.h - found
  -- Looking for stdint.h
  -- Looking for stdint.h - found
  -- Looking for unistd.h
  -- Looking for unistd.h - not found
  -- Looking for pthread.h
  -- Looking for pthread.h - not found
  -- Looking for sys/ioctl.h
  -- Looking for sys/ioctl.h - not found
  -- Looking for sys/param.h
  -- Looking for sys/param.h - not found
  -- Looking for sys/resource.h
  -- Looking for sys/resource.h - not found
  -- Looking for sys/socket.h
  -- Looking for sys/socket.h - not found
  -- Looking for sys/stat.h
  -- Looking for sys/stat.h - found
  -- Looking for sys/time.h
  -- Looking for sys/time.h - not found
  -- Looking for sys/un.h
  -- Looking for sys/un.h - not found
  -- Looking for poll.h
  -- Looking for poll.h - not found
  -- Looking for sys/poll.h
  -- Looking for sys/poll.h - not found
  -- Looking for sys/select.h
  -- Looking for sys/select.h - not found
  -- Looking for sched.h
  -- Looking for sched.h - not found
  -- Looking for string.h
  -- Looking for string.h - found
  -- Looking for strings.h
  -- Looking for strings.h - not found
  -- Looking for gethostbyname
  -- Looking for gethostbyname - not found
  -- Looking for gethostbyname_r
  -- Looking for gethostbyname_r - not found
  -- Looking for strerror_r
  -- Looking for strerror_r - not found
  -- Looking for sched_get_priority_max
  -- Looking for sched_get_priority_max - not found
  -- Looking for sched_get_priority_min
  -- Looking for sched_get_priority_min - not found
  -- Performing Test STRERROR_R_CHAR_P
  -- Performing Test STRERROR_R_CHAR_P - Failed
  -- Looking for pthread.h
  -- Looking for pthread.h - not found

为什么找不到所有这些文件?另外,为什么没有设置提升库的路径?构建箭头本身时,会自动解析到 Boost 的路径。

这是在其他依赖项(如 snappy 或 brotli)构建正常时发生的。知道为什么会这样吗?我在 Windows 中缺少依赖项吗?

非常感谢任何帮助。

rapidjson 的第一个问题:

根据 apache-arrow-0.14.1.tar.gz 的 ThirdpartyToolchain.cmake,rapidjson 构建依赖于 ARROW_WITH_RAPIDJSON, 这是为 ARROW_FLIGHT OR ARROW_IPC.

自动设置的

所以你需要ARROW_WITH_RAPIDJSON=ONARROW_FLIGHT=ONARROW_IPC=ON

BOOST 第二期:

这只是问题,但我使用的是 BOOST_ROOT+BOOST_LIBRARYDIR 而不是我在您的代码中看到的 BOOST_LIBRARYDIR+BOOST_INCLUDEDIR

ThirdpartyToolchain.cmake 中是 build_thrift 宏中的以下代码,它可以解释你的节俭编译问题:

#Thrift also uses boost. Forward important boost settings if there were ones passed.
    if(DEFINED BOOST_ROOT)
      set(THRIFT_CMAKE_ARGS ${THRIFT_CMAKE_ARGS} "-DBOOST_ROOT=${BOOST_ROOT}")
    endif()