Arrow 构建在 windows 中失败
Arrow build fails in windows
我正在尝试在 Windows 上离线构建 Apache Arrow。按照网站上的说明,我已经下载了所有依赖项并设置了环境变量:
SET ARROW_BOOST_URL=%ARROW_DEPENDENCY_ROOT%boost-1.67.0.tar.gz
SET ARROW_BROTLI_URL=%ARROW_DEPENDENCY_ROOT%brotli-v1.0.7.tar.gz
SET ARROW_CARES_URL=%ARROW_DEPENDENCY_ROOT%cares-1.15.0.tar.gz
SET ARROW_DOUBLE_CONVERSION_URL=%ARROW_DEPENDENCY_ROOT%double-conversion-v3.1.4.tar.gz
SET ARROW_FLATBUFFERS_URL=%ARROW_DEPENDENCY_ROOT%flatbuffers-v1.10.0.tar.gz
SET ARROW_GBENCHMARK_URL=%ARROW_DEPENDENCY_ROOT%gbenchmark-v1.4.1.tar.gz
SET ARROW_GFLAGS_URL=%ARROW_DEPENDENCY_ROOT%gflags-v2.2.0.tar.gz
SET ARROW_GLOG_URL=%ARROW_DEPENDENCY_ROOT%glog-v0.3.5.tar.gz
SET ARROW_GRPC_URL=%ARROW_DEPENDENCY_ROOT%grpc-v1.20.0.tar.gz
SET ARROW_GTEST_URL=%ARROW_DEPENDENCY_ROOT%gtest-1.8.1.tar.gz
SET ARROW_JEMALLOC_URL=%ARROW_DEPENDENCY_ROOT%jemalloc-5.2.0.tar.gz
SET ARROW_LZ4_URL=%ARROW_DEPENDENCY_ROOT%lz4-v1.8.3.tar.gz
SET ARROW_ORC_URL=%ARROW_DEPENDENCY_ROOT%orc-1.5.5.tar.gz
SET ARROW_PROTOBUF_URL=%ARROW_DEPENDENCY_ROOT%protobuf-v3.7.1.tar.gz
SET ARROW_RAPIDJSON_URL=%ARROW_DEPENDENCY_ROOT%rapidjson-2bbd33b33217ff4a73434ebf10cdac41e2ef5e34.tar.gz
SET ARROW_RE2_URL=%ARROW_DEPENDENCY_ROOT%re2-2019-04-01.tar.gz
SET ARROW_SNAPPY_URL=%ARROW_DEPENDENCY_ROOT%snappy-1.1.7.tar.gz
SET ARROW_THRIFT_URL=%ARROW_DEPENDENCY_ROOT%thrift-0.12.0.tar.gz
SET ARROW_URIPARSER_URL=%ARROW_DEPENDENCY_ROOT%uriparser-0.9.2.tar.gz
SET ARROW_ZLIB_URL=%ARROW_DEPENDENCY_ROOT%zlib-1.2.11.tar.gz
SET ARROW_ZSTD_URL=%ARROW_DEPENDENCY_ROOT%zstd-v1.4.0.tar.gz
我正在运行使用以下命令构建:
if not defined DevEnvDir (
call "C:\Program Files (x86)\Microsoft Visual Studio17\Enterprise\Common7\Tools\VsDevCmd.bat" -arch=amd64 -host_arch=amd64
)
cmake -G %GENERATOR% %CMAKE_ARGS% ^
-DCMAKE_BUILD_TYPE=%CONFIGURATION% ^
-DARROW_BUILD_TESTS=OFF ^
-DARROW_BUILD_EXAMPLES=OFF ^
-DARROW_BUILD_STATIC=ON ^
-DARROW_PARQUET=ON ^
-DPARQUET_BUILD_EXECUTABLES=OFF ^
-DARROW_IPC=OFF ^
-DARROW_BUILD_UTILITIES=OFF ^
-DARROW_HDFS=OFF ^
-DARROW_CXXFLAGS="%ARROW_CXXFLAGS%" ^
-DCMAKE_CXX_FLAGS_RELEASE="/MD %CMAKE_CXX_FLAGS_RELEASE%" ^
-DCMAKE_VERBOSE_MAKEFILE=OFF ^
-DARROW_BOOST_USE_SHARED=OFF ^
-DCMAKE_VERBOSE_MAKEFILE=OFF ^
..
cmake --build . --target install --config %CONFIGURATION%
我的变量设置如下:
set CMAKE_ARGS=%CMAKE_ARGS% ^
-DARROW_DEPENDENCY_SOURCE=BUNDLED ^
-DBOOST_SOURCE=SYSTEM ^
-DTHRIFT_SOURCE=SYSTEM ^
-DBOOST_DEBUG=OFF ^
-DBOOST_USE_STATIC_LIBS=ON ^
-DBOOST_LIBRARYDIR=%BOOST_LIBRARYDIR% ^
-DBOOST_INCLUDEDIR=%BOOST_INCLUDEDIR% ^
-DRAPIDJSON_ROOT=%RAPIDJSON_ROOTDIR% ^
-DTHRIFT_ROOT=%THRIFT_ROOT%
当我 运行 这样做时,我会收到以下错误,抱怨找不到 rapidjson 头文件(即使它们存在于上面的列表中):
cpp\src\arrow\json\chunker.cc(25): fatal error C1083: Cannot open include file: 'rapidjson/reader.h': No such file or directory
我可以通过手动复制源代码旁边的 rapidjson 头文件来解决这个问题(理想情况下我不想这样做,而是构建系统会自动选择它)。即使在这一步之后,我也会收到以下错误:
这似乎是由于 thrift 构建不当造成的:
Performing download step (verify and extract) for 'thrift_ep'
-- verifying file...
file='t:\src\apache-arrow\cpp\thirdparty\arrow-dependencies\thrift-0.12.0.tar.gz'
-- verifying file... done
-- extracting...
src='T:/src/apache-arrow/cpp/thirdparty/arrow-dependencies/thrift-0.12.0.tar.gz'
dst='T:/src/apache-arrow/cpp/build_Release/thrift_ep-prefix/src/thrift_ep'
-- extracting... [tar xfz]
-- extracting... [analysis]
-- extracting... [rename]
-- extracting... [clean up]
-- extracting... done
No update step for 'thrift_ep'
No patch step for 'thrift_ep'
Performing configure step for 'thrift_ep'
-- The C compiler identification is MSVC 19.16.27031.1
-- The CXX compiler identification is MSVC 19.16.27031.1
-- Check for working C compiler: C:/Program Files (x86)/Microsoft Visual Studio/2017/Enterprise/VC/Tools/MSVC/14.16.27023/bin/Hostx86/x64/cl.exe
-- Check for working C compiler: C:/Program Files (x86)/Microsoft Visual Studio/2017/Enterprise/VC/Tools/MSVC/14.16.27023/bin/Hostx86/x64/cl.exe -- works
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Detecting C compile features
-- Detecting C compile features - done
-- Check for working CXX compiler: C:/Program Files (x86)/Microsoft Visual Studio/2017/Enterprise/VC/Tools/MSVC/14.16.27023/bin/Hostx86/x64/cl.exe
-- Check for working CXX compiler: C:/Program Files (x86)/Microsoft Visual Studio/2017/Enterprise/VC/Tools/MSVC/14.16.27023/bin/Hostx86/x64/cl.exe -- works
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Parsed Thrift package version: 0.12.0
-- Parsed Thrift version: 0.12.0 (0.2.0)
-- Setting C++11 as the default language level.
-- To specify a different C++ language level, set CMAKE_CXX_STANDARD
BOOST_ROOT:
Boost_LIBRARY_DIR_RELEASE:
BOOST_LIBRARYDIR:
BOOST_INCLUDEDIR:
Boost_USE_STATIC_LIBS: OFF
Boost_DEBUG:
-- libevent NOT found.
-- Could NOT find RUN_HASKELL (missing: RUN_HASKELL)
-- Could NOT find CABAL (missing: CABAL)
-- Looking for arpa/inet.h
-- Looking for arpa/inet.h - not found
-- Looking for fcntl.h
-- Looking for fcntl.h - found
-- Looking for getopt.h
-- Looking for getopt.h - not found
-- Looking for inttypes.h
-- Looking for inttypes.h - found
-- Looking for netdb.h
-- Looking for netdb.h - not found
-- Looking for netinet/in.h
-- Looking for netinet/in.h - not found
-- Looking for signal.h
-- Looking for signal.h - found
-- Looking for stdint.h
-- Looking for stdint.h - found
-- Looking for unistd.h
-- Looking for unistd.h - not found
-- Looking for pthread.h
-- Looking for pthread.h - not found
-- Looking for sys/ioctl.h
-- Looking for sys/ioctl.h - not found
-- Looking for sys/param.h
-- Looking for sys/param.h - not found
-- Looking for sys/resource.h
-- Looking for sys/resource.h - not found
-- Looking for sys/socket.h
-- Looking for sys/socket.h - not found
-- Looking for sys/stat.h
-- Looking for sys/stat.h - found
-- Looking for sys/time.h
-- Looking for sys/time.h - not found
-- Looking for sys/un.h
-- Looking for sys/un.h - not found
-- Looking for poll.h
-- Looking for poll.h - not found
-- Looking for sys/poll.h
-- Looking for sys/poll.h - not found
-- Looking for sys/select.h
-- Looking for sys/select.h - not found
-- Looking for sched.h
-- Looking for sched.h - not found
-- Looking for string.h
-- Looking for string.h - found
-- Looking for strings.h
-- Looking for strings.h - not found
-- Looking for gethostbyname
-- Looking for gethostbyname - not found
-- Looking for gethostbyname_r
-- Looking for gethostbyname_r - not found
-- Looking for strerror_r
-- Looking for strerror_r - not found
-- Looking for sched_get_priority_max
-- Looking for sched_get_priority_max - not found
-- Looking for sched_get_priority_min
-- Looking for sched_get_priority_min - not found
-- Performing Test STRERROR_R_CHAR_P
-- Performing Test STRERROR_R_CHAR_P - Failed
-- Looking for pthread.h
-- Looking for pthread.h - not found
为什么找不到所有这些文件?另外,为什么没有设置提升库的路径?构建箭头本身时,会自动解析到 Boost 的路径。
这是在其他依赖项(如 snappy 或 brotli)构建正常时发生的。知道为什么会这样吗?我在 Windows 中缺少依赖项吗?
非常感谢任何帮助。
rapidjson 的第一个问题:
根据 apache-arrow-0.14.1.tar.gz
的 ThirdpartyToolchain.cmake,rapidjson 构建依赖于 ARROW_WITH_RAPIDJSON
,
这是为 ARROW_FLIGHT OR ARROW_IPC
.
自动设置的
所以你需要ARROW_WITH_RAPIDJSON=ON
或ARROW_FLIGHT=ON
或ARROW_IPC=ON
。
BOOST 第二期:
这只是问题,但我使用的是 BOOST_ROOT+BOOST_LIBRARYDIR
而不是我在您的代码中看到的 BOOST_LIBRARYDIR+BOOST_INCLUDEDIR
。
在 ThirdpartyToolchain.cmake
中是 build_thrift
宏中的以下代码,它可以解释你的节俭编译问题:
#Thrift also uses boost. Forward important boost settings if there were ones passed.
if(DEFINED BOOST_ROOT)
set(THRIFT_CMAKE_ARGS ${THRIFT_CMAKE_ARGS} "-DBOOST_ROOT=${BOOST_ROOT}")
endif()
我正在尝试在 Windows 上离线构建 Apache Arrow。按照网站上的说明,我已经下载了所有依赖项并设置了环境变量:
SET ARROW_BOOST_URL=%ARROW_DEPENDENCY_ROOT%boost-1.67.0.tar.gz
SET ARROW_BROTLI_URL=%ARROW_DEPENDENCY_ROOT%brotli-v1.0.7.tar.gz
SET ARROW_CARES_URL=%ARROW_DEPENDENCY_ROOT%cares-1.15.0.tar.gz
SET ARROW_DOUBLE_CONVERSION_URL=%ARROW_DEPENDENCY_ROOT%double-conversion-v3.1.4.tar.gz
SET ARROW_FLATBUFFERS_URL=%ARROW_DEPENDENCY_ROOT%flatbuffers-v1.10.0.tar.gz
SET ARROW_GBENCHMARK_URL=%ARROW_DEPENDENCY_ROOT%gbenchmark-v1.4.1.tar.gz
SET ARROW_GFLAGS_URL=%ARROW_DEPENDENCY_ROOT%gflags-v2.2.0.tar.gz
SET ARROW_GLOG_URL=%ARROW_DEPENDENCY_ROOT%glog-v0.3.5.tar.gz
SET ARROW_GRPC_URL=%ARROW_DEPENDENCY_ROOT%grpc-v1.20.0.tar.gz
SET ARROW_GTEST_URL=%ARROW_DEPENDENCY_ROOT%gtest-1.8.1.tar.gz
SET ARROW_JEMALLOC_URL=%ARROW_DEPENDENCY_ROOT%jemalloc-5.2.0.tar.gz
SET ARROW_LZ4_URL=%ARROW_DEPENDENCY_ROOT%lz4-v1.8.3.tar.gz
SET ARROW_ORC_URL=%ARROW_DEPENDENCY_ROOT%orc-1.5.5.tar.gz
SET ARROW_PROTOBUF_URL=%ARROW_DEPENDENCY_ROOT%protobuf-v3.7.1.tar.gz
SET ARROW_RAPIDJSON_URL=%ARROW_DEPENDENCY_ROOT%rapidjson-2bbd33b33217ff4a73434ebf10cdac41e2ef5e34.tar.gz
SET ARROW_RE2_URL=%ARROW_DEPENDENCY_ROOT%re2-2019-04-01.tar.gz
SET ARROW_SNAPPY_URL=%ARROW_DEPENDENCY_ROOT%snappy-1.1.7.tar.gz
SET ARROW_THRIFT_URL=%ARROW_DEPENDENCY_ROOT%thrift-0.12.0.tar.gz
SET ARROW_URIPARSER_URL=%ARROW_DEPENDENCY_ROOT%uriparser-0.9.2.tar.gz
SET ARROW_ZLIB_URL=%ARROW_DEPENDENCY_ROOT%zlib-1.2.11.tar.gz
SET ARROW_ZSTD_URL=%ARROW_DEPENDENCY_ROOT%zstd-v1.4.0.tar.gz
我正在运行使用以下命令构建:
if not defined DevEnvDir (
call "C:\Program Files (x86)\Microsoft Visual Studio17\Enterprise\Common7\Tools\VsDevCmd.bat" -arch=amd64 -host_arch=amd64
)
cmake -G %GENERATOR% %CMAKE_ARGS% ^
-DCMAKE_BUILD_TYPE=%CONFIGURATION% ^
-DARROW_BUILD_TESTS=OFF ^
-DARROW_BUILD_EXAMPLES=OFF ^
-DARROW_BUILD_STATIC=ON ^
-DARROW_PARQUET=ON ^
-DPARQUET_BUILD_EXECUTABLES=OFF ^
-DARROW_IPC=OFF ^
-DARROW_BUILD_UTILITIES=OFF ^
-DARROW_HDFS=OFF ^
-DARROW_CXXFLAGS="%ARROW_CXXFLAGS%" ^
-DCMAKE_CXX_FLAGS_RELEASE="/MD %CMAKE_CXX_FLAGS_RELEASE%" ^
-DCMAKE_VERBOSE_MAKEFILE=OFF ^
-DARROW_BOOST_USE_SHARED=OFF ^
-DCMAKE_VERBOSE_MAKEFILE=OFF ^
..
cmake --build . --target install --config %CONFIGURATION%
我的变量设置如下:
set CMAKE_ARGS=%CMAKE_ARGS% ^
-DARROW_DEPENDENCY_SOURCE=BUNDLED ^
-DBOOST_SOURCE=SYSTEM ^
-DTHRIFT_SOURCE=SYSTEM ^
-DBOOST_DEBUG=OFF ^
-DBOOST_USE_STATIC_LIBS=ON ^
-DBOOST_LIBRARYDIR=%BOOST_LIBRARYDIR% ^
-DBOOST_INCLUDEDIR=%BOOST_INCLUDEDIR% ^
-DRAPIDJSON_ROOT=%RAPIDJSON_ROOTDIR% ^
-DTHRIFT_ROOT=%THRIFT_ROOT%
当我 运行 这样做时,我会收到以下错误,抱怨找不到 rapidjson 头文件(即使它们存在于上面的列表中):
cpp\src\arrow\json\chunker.cc(25): fatal error C1083: Cannot open include file: 'rapidjson/reader.h': No such file or directory
我可以通过手动复制源代码旁边的 rapidjson 头文件来解决这个问题(理想情况下我不想这样做,而是构建系统会自动选择它)。即使在这一步之后,我也会收到以下错误:
这似乎是由于 thrift 构建不当造成的:
Performing download step (verify and extract) for 'thrift_ep'
-- verifying file...
file='t:\src\apache-arrow\cpp\thirdparty\arrow-dependencies\thrift-0.12.0.tar.gz'
-- verifying file... done
-- extracting...
src='T:/src/apache-arrow/cpp/thirdparty/arrow-dependencies/thrift-0.12.0.tar.gz'
dst='T:/src/apache-arrow/cpp/build_Release/thrift_ep-prefix/src/thrift_ep'
-- extracting... [tar xfz]
-- extracting... [analysis]
-- extracting... [rename]
-- extracting... [clean up]
-- extracting... done
No update step for 'thrift_ep'
No patch step for 'thrift_ep'
Performing configure step for 'thrift_ep'
-- The C compiler identification is MSVC 19.16.27031.1
-- The CXX compiler identification is MSVC 19.16.27031.1
-- Check for working C compiler: C:/Program Files (x86)/Microsoft Visual Studio/2017/Enterprise/VC/Tools/MSVC/14.16.27023/bin/Hostx86/x64/cl.exe
-- Check for working C compiler: C:/Program Files (x86)/Microsoft Visual Studio/2017/Enterprise/VC/Tools/MSVC/14.16.27023/bin/Hostx86/x64/cl.exe -- works
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Detecting C compile features
-- Detecting C compile features - done
-- Check for working CXX compiler: C:/Program Files (x86)/Microsoft Visual Studio/2017/Enterprise/VC/Tools/MSVC/14.16.27023/bin/Hostx86/x64/cl.exe
-- Check for working CXX compiler: C:/Program Files (x86)/Microsoft Visual Studio/2017/Enterprise/VC/Tools/MSVC/14.16.27023/bin/Hostx86/x64/cl.exe -- works
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Parsed Thrift package version: 0.12.0
-- Parsed Thrift version: 0.12.0 (0.2.0)
-- Setting C++11 as the default language level.
-- To specify a different C++ language level, set CMAKE_CXX_STANDARD
BOOST_ROOT:
Boost_LIBRARY_DIR_RELEASE:
BOOST_LIBRARYDIR:
BOOST_INCLUDEDIR:
Boost_USE_STATIC_LIBS: OFF
Boost_DEBUG:
-- libevent NOT found.
-- Could NOT find RUN_HASKELL (missing: RUN_HASKELL)
-- Could NOT find CABAL (missing: CABAL)
-- Looking for arpa/inet.h
-- Looking for arpa/inet.h - not found
-- Looking for fcntl.h
-- Looking for fcntl.h - found
-- Looking for getopt.h
-- Looking for getopt.h - not found
-- Looking for inttypes.h
-- Looking for inttypes.h - found
-- Looking for netdb.h
-- Looking for netdb.h - not found
-- Looking for netinet/in.h
-- Looking for netinet/in.h - not found
-- Looking for signal.h
-- Looking for signal.h - found
-- Looking for stdint.h
-- Looking for stdint.h - found
-- Looking for unistd.h
-- Looking for unistd.h - not found
-- Looking for pthread.h
-- Looking for pthread.h - not found
-- Looking for sys/ioctl.h
-- Looking for sys/ioctl.h - not found
-- Looking for sys/param.h
-- Looking for sys/param.h - not found
-- Looking for sys/resource.h
-- Looking for sys/resource.h - not found
-- Looking for sys/socket.h
-- Looking for sys/socket.h - not found
-- Looking for sys/stat.h
-- Looking for sys/stat.h - found
-- Looking for sys/time.h
-- Looking for sys/time.h - not found
-- Looking for sys/un.h
-- Looking for sys/un.h - not found
-- Looking for poll.h
-- Looking for poll.h - not found
-- Looking for sys/poll.h
-- Looking for sys/poll.h - not found
-- Looking for sys/select.h
-- Looking for sys/select.h - not found
-- Looking for sched.h
-- Looking for sched.h - not found
-- Looking for string.h
-- Looking for string.h - found
-- Looking for strings.h
-- Looking for strings.h - not found
-- Looking for gethostbyname
-- Looking for gethostbyname - not found
-- Looking for gethostbyname_r
-- Looking for gethostbyname_r - not found
-- Looking for strerror_r
-- Looking for strerror_r - not found
-- Looking for sched_get_priority_max
-- Looking for sched_get_priority_max - not found
-- Looking for sched_get_priority_min
-- Looking for sched_get_priority_min - not found
-- Performing Test STRERROR_R_CHAR_P
-- Performing Test STRERROR_R_CHAR_P - Failed
-- Looking for pthread.h
-- Looking for pthread.h - not found
为什么找不到所有这些文件?另外,为什么没有设置提升库的路径?构建箭头本身时,会自动解析到 Boost 的路径。
这是在其他依赖项(如 snappy 或 brotli)构建正常时发生的。知道为什么会这样吗?我在 Windows 中缺少依赖项吗?
非常感谢任何帮助。
rapidjson 的第一个问题:
根据 apache-arrow-0.14.1.tar.gz
的 ThirdpartyToolchain.cmake,rapidjson 构建依赖于 ARROW_WITH_RAPIDJSON
,
这是为 ARROW_FLIGHT OR ARROW_IPC
.
所以你需要ARROW_WITH_RAPIDJSON=ON
或ARROW_FLIGHT=ON
或ARROW_IPC=ON
。
BOOST 第二期:
这只是问题,但我使用的是 BOOST_ROOT+BOOST_LIBRARYDIR
而不是我在您的代码中看到的 BOOST_LIBRARYDIR+BOOST_INCLUDEDIR
。
在 ThirdpartyToolchain.cmake
中是 build_thrift
宏中的以下代码,它可以解释你的节俭编译问题:
#Thrift also uses boost. Forward important boost settings if there were ones passed.
if(DEFINED BOOST_ROOT)
set(THRIFT_CMAKE_ARGS ${THRIFT_CMAKE_ARGS} "-DBOOST_ROOT=${BOOST_ROOT}")
endif()