1{ stdenv
2, lib
3, fetchurl
4, fetchFromGitHub
5, fixDarwinDylibNames
6, abseil-cpp
7, autoconf
8, aws-sdk-cpp
9, boost
10, brotli
11, c-ares
12, cmake
13, crc32c
14, curl
15, flatbuffers
16, gflags
17, glog
18, google-cloud-cpp
19, grpc
20, gtest
21, jemalloc
22, libnsl
23, lz4
24, minio
25, nlohmann_json
26, openssl
27, perl
28, protobuf
29, python3
30, rapidjson
31, re2
32, snappy
33, thrift
34, tzdata
35, utf8proc
36, which
37, zlib
38, zstd
39, enableShared ? !stdenv.hostPlatform.isStatic
40, enableFlight ? !stdenv.isDarwin # libnsl is not supported on darwin
41 # boost/process is broken in 1.69 on darwin, but fixed in 1.70 and
42 # non-existent in older versions
43 # see https://github.com/boostorg/process/issues/55
44, enableS3 ? (!stdenv.isDarwin) || (lib.versionOlder boost.version "1.69" || lib.versionAtLeast boost.version "1.70")
45, enableGcs ? !stdenv.isDarwin # google-cloud-cpp is not supported on darwin
46}:
47
48assert lib.asserts.assertMsg
49 ((enableS3 && stdenv.isDarwin) -> (lib.versionOlder boost.version "1.69" || lib.versionAtLeast boost.version "1.70"))
50 "S3 on Darwin requires Boost != 1.69";
51
52let
53 arrow-testing = fetchFromGitHub {
54 owner = "apache";
55 repo = "arrow-testing";
56 rev = "a60b715263d9bbf7e744527fb0c084b693f58043";
57 hash = "sha256-Dz1dCV0m5Y24qzXdVaqrZ7hK3MRSb4GF0PXrjMAsjZU=";
58 };
59
60 parquet-testing = fetchFromGitHub {
61 owner = "apache";
62 repo = "parquet-testing";
63 rev = "d4d485956a643c693b5549e1a62d52ca61c170f1";
64 hash = "sha256-GmOAS8gGhzDI0WzORMkWHRRUl/XBwmNen2d3VefZxxc=";
65 };
66
67in
68stdenv.mkDerivation rec {
69 pname = "arrow-cpp";
70 version = "6.0.0";
71
72 src = fetchurl {
73 url =
74 "mirror://apache/arrow/arrow-${version}/apache-arrow-${version}.tar.gz";
75 hash = "sha256-adJo+egtPr71la0b3IPUywKyDBgZRqaGMfZkXXwfepA=";
76 };
77 sourceRoot = "apache-arrow-${version}/cpp";
78
79 ARROW_JEMALLOC_URL = jemalloc.src;
80
81 ARROW_MIMALLOC_URL = fetchFromGitHub {
82 # From
83 # ./cpp/cmake_modules/ThirdpartyToolchain.cmake
84 # ./cpp/thirdparty/versions.txt
85 owner = "microsoft";
86 repo = "mimalloc";
87 rev = "v1.7.2";
88 hash = "sha256-yHupYFgC8mJuLUSpuEAfwF7l6Ue4EiuO1Q4qN4T6wWc=";
89 };
90
91 ARROW_XSIMD_URL = fetchFromGitHub {
92 owner = "xtensor-stack";
93 repo = "xsimd";
94 rev = "aeec9c872c8b475dedd7781336710f2dd2666cb2";
95 hash = "sha256-vWKdJkieKhaxyAJhijXUmD7NmNvMWd79PskQojulA1w=";
96 };
97
98 patches = [
99 # patch to fix python-test
100 ./darwin.patch
101 ];
102
103 nativeBuildInputs = [
104 cmake
105 autoconf # for vendored jemalloc
106 flatbuffers
107 ] ++ lib.optional stdenv.isDarwin fixDarwinDylibNames;
108 buildInputs = [
109 boost
110 brotli
111 flatbuffers
112 gflags
113 glog
114 gtest
115 lz4
116 rapidjson
117 re2
118 snappy
119 thrift
120 utf8proc
121 zlib
122 zstd
123 ] ++ lib.optionals enableShared [
124 python3.pkgs.python
125 python3.pkgs.numpy
126 ] ++ lib.optionals enableFlight [
127 grpc
128 libnsl
129 openssl
130 protobuf
131 ] ++ lib.optionals enableS3 [ aws-sdk-cpp openssl ]
132 ++ lib.optionals enableGcs [
133 abseil-cpp
134 crc32c
135 curl
136 google-cloud-cpp
137 nlohmann_json
138 ];
139
140 preConfigure = ''
141 patchShebangs build-support/
142 substituteInPlace "src/arrow/vendored/datetime/tz.cpp" \
143 --replace "/usr/share/zoneinfo" "${tzdata}/share/zoneinfo"
144 '';
145
146 cmakeFlags = [
147 "-DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON"
148 "-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}"
149 "-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}"
150 "-DARROW_BUILD_TESTS=ON"
151 "-DARROW_VERBOSE_THIRDPARTY_BUILD=ON"
152 "-DARROW_DEPENDENCY_SOURCE=SYSTEM"
153 "-DThrift_SOURCE=AUTO" # search for Thrift using pkg-config (ThriftConfig.cmake requires OpenSSL and libevent)
154 "-DARROW_DEPENDENCY_USE_SHARED=${if enableShared then "ON" else "OFF"}"
155 "-DARROW_COMPUTE=ON"
156 "-DARROW_CSV=ON"
157 "-DARROW_DATASET=ON"
158 "-DARROW_JSON=ON"
159 "-DARROW_PLASMA=ON"
160 # Disable Python for static mode because openblas is currently broken there.
161 "-DARROW_PYTHON=${if enableShared then "ON" else "OFF"}"
162 "-DARROW_USE_GLOG=ON"
163 "-DARROW_WITH_BROTLI=ON"
164 "-DARROW_WITH_LZ4=ON"
165 "-DARROW_WITH_SNAPPY=ON"
166 "-DARROW_WITH_UTF8PROC=ON"
167 "-DARROW_WITH_ZLIB=ON"
168 "-DARROW_WITH_ZSTD=ON"
169 "-DARROW_MIMALLOC=ON"
170 # Parquet options:
171 "-DARROW_PARQUET=ON"
172 "-DPARQUET_BUILD_EXECUTABLES=ON"
173 "-DARROW_FLIGHT=${if enableFlight then "ON" else "OFF"}"
174 "-DARROW_S3=${if enableS3 then "ON" else "OFF"}"
175 "-DARROW_GCS=${if enableGcs then "ON" else "OFF"}"
176 ] ++ lib.optionals (!enableShared) [
177 "-DARROW_TEST_LINKAGE=static"
178 ] ++ lib.optionals stdenv.isDarwin [
179 "-DCMAKE_SKIP_BUILD_RPATH=OFF" # needed for tests
180 "-DCMAKE_INSTALL_RPATH=@loader_path/../lib" # needed for tools executables
181 ] ++ lib.optional (!stdenv.isx86_64) "-DARROW_USE_SIMD=OFF"
182 ++ lib.optional enableS3 "-DAWSSDK_CORE_HEADER_FILE=${aws-sdk-cpp}/include/aws/core/Aws.h";
183
184 doInstallCheck = true;
185 ARROW_TEST_DATA = lib.optionalString doInstallCheck "${arrow-testing}/data";
186 PARQUET_TEST_DATA = lib.optionalString doInstallCheck "${parquet-testing}/data";
187 GTEST_FILTER =
188 let
189 # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11398
190 filteredTests = lib.optionals stdenv.hostPlatform.isAarch64 [
191 "TestFilterKernelWithNumeric/3.CompareArrayAndFilterRandomNumeric"
192 "TestFilterKernelWithNumeric/7.CompareArrayAndFilterRandomNumeric"
193 "TestCompareKernel.PrimitiveRandomTests"
194 ] ++ lib.optionals enableS3 [
195 "S3OptionsTest.FromUri"
196 "S3RegionResolutionTest.NonExistentBucket"
197 "S3RegionResolutionTest.PublicBucket"
198 "S3RegionResolutionTest.RestrictedBucket"
199 "TestMinioServer.Connect"
200 "TestS3FS.OpenOutputStreamBackgroundWrites"
201 "TestS3FS.OpenOutputStreamDestructorBackgroundWrites"
202 "TestS3FS.OpenOutputStreamDestructorSyncWrite"
203 "TestS3FS.OpenOutputStreamDestructorSyncWrites"
204 "TestS3FS.OpenOutputStreamMetadata"
205 "TestS3FS.OpenOutputStreamSyncWrites"
206 "TestS3FSGeneric.*"
207 ] ++ lib.optionals enableGcs [
208 "GcsFileSystem.FileSystemCompare"
209 "GcsIntegrationTest.*"
210 ];
211 in
212 lib.optionalString doInstallCheck "-${builtins.concatStringsSep ":" filteredTests}";
213 installCheckInputs = [ perl which ] ++ lib.optional enableS3 minio;
214 installCheckPhase =
215 let
216 excludedTests = lib.optionals stdenv.isDarwin [
217 # Some plasma tests need to be patched to use a shorter AF_UNIX socket
218 # path on Darwin. See https://github.com/NixOS/nix/pull/1085
219 "plasma-external-store-tests"
220 "plasma-client-tests"
221 ];
222 in
223 ''
224 runHook preInstallCheck
225
226 ctest -L unittest \
227 --exclude-regex '^(${builtins.concatStringsSep "|" excludedTests})$'
228
229 runHook postInstallCheck
230 '';
231
232 meta = with lib; {
233 description = "A cross-language development platform for in-memory data";
234 homepage = "https://arrow.apache.org/";
235 license = licenses.asl20;
236 platforms = platforms.unix;
237 maintainers = with maintainers; [ tobim veprbl cpcloud ];
238 };
239}