1{ stdenv
2, lib
3, fetchurl
4, fetchFromGitHub
5, fixDarwinDylibNames
6, autoconf
7, aws-sdk-cpp
8, boost
9, brotli
10, c-ares
11, cmake
12, crc32c
13, curl
14, flatbuffers
15, gflags
16, glog
17, google-cloud-cpp
18, grpc
19, gtest
20, libbacktrace
21, lz4
22, minio
23, ninja
24, nlohmann_json
25, openssl
26, perl
27, protobuf
28, python3
29, rapidjson
30, re2
31, snappy
32, sqlite
33, thrift
34, tzdata
35, utf8proc
36, which
37, zlib
38, zstd
39, enableShared ? !stdenv.hostPlatform.isStatic
40, enableFlight ? true
41, enableJemalloc ? !stdenv.isDarwin
42 # boost/process is broken in 1.69 on darwin, but fixed in 1.70 and
43 # non-existent in older versions
44 # see https://github.com/boostorg/process/issues/55
45, enableS3 ? (!stdenv.isDarwin) || (lib.versionOlder boost.version "1.69" || lib.versionAtLeast boost.version "1.70")
46, enableGcs ? !stdenv.isDarwin # google-cloud-cpp is not supported on darwin
47}:
48
49assert lib.asserts.assertMsg
50 ((enableS3 && stdenv.isDarwin) -> (lib.versionOlder boost.version "1.69" || lib.versionAtLeast boost.version "1.70"))
51 "S3 on Darwin requires Boost != 1.69";
52
53let
54 arrow-testing = fetchFromGitHub {
55 owner = "apache";
56 repo = "arrow-testing";
57 rev = "5bab2f264a23f5af68f69ea93d24ef1e8e77fc88";
58 hash = "sha256-Pxx8ohUpXb5u1995IvXmxQMqWiDJ+7LAll/AjQP7ph8=";
59 };
60
61 parquet-testing = fetchFromGitHub {
62 owner = "apache";
63 repo = "parquet-testing";
64 rev = "aafd3fc9df431c2625a514fb46626e5614f1d199";
65 hash = "sha256-cO5t/mgsbBhbSefx8EMGTyxmgTjhZ8mFujkFQ3p/JS0=";
66 };
67
68in
69stdenv.mkDerivation rec {
70 pname = "arrow-cpp";
71 version = "9.0.0";
72
73 src = fetchurl {
74 url = "mirror://apache/arrow/arrow-${version}/apache-arrow-${version}.tar.gz";
75 hash = "sha256-qaAz8KNJAomZj0WGgNGVec8HkRcXumWv3my4AHD3qbU=";
76 };
77 sourceRoot = "apache-arrow-${version}/cpp";
78
79 # versions are all taken from
80 # https://github.com/apache/arrow/blob/apache-arrow-${version}/cpp/thirdparty/versions.txt
81
82 # jemalloc: arrow uses a custom prefix to prevent default allocator symbol
83 # collisions as well as custom build flags
84 ${if enableJemalloc then "ARROW_JEMALLOC_URL" else null} = fetchurl {
85 url = "https://github.com/jemalloc/jemalloc/releases/download/5.3.0/jemalloc-5.3.0.tar.bz2";
86 hash = "sha256-LbgtHnEZ3z5xt2QCGbbf6EeJvAU3mDw7esT3GJrs/qo=";
87 };
88
89 # mimalloc: arrow uses custom build flags for mimalloc
90 ARROW_MIMALLOC_URL = fetchFromGitHub {
91 owner = "microsoft";
92 repo = "mimalloc";
93 rev = "v2.0.6";
94 hash = "sha256-u2ITXABBN/dwU+mCIbL3tN1f4c17aBuSdNTV+Adtohc=";
95 };
96
97 ARROW_XSIMD_URL = fetchFromGitHub {
98 owner = "xtensor-stack";
99 repo = "xsimd";
100 rev = "8.1.0";
101 hash = "sha256-Aqs6XJkGjAjGAp0PprabSM4m+32M/UXpSHppCHdzaZk=";
102 };
103
104 ARROW_SUBSTRAIT_URL = fetchFromGitHub {
105 owner = "substrait-io";
106 repo = "substrait";
107 rev = "v0.6.0";
108 hash = "sha256-hxCBomL4Qg9cHLRg9ZiO9k+JVOZXn6f4ikPtK+V9tno=";
109 };
110
111 patches = [
112 # patch to fix python-test
113 ./darwin.patch
114 ];
115
116 nativeBuildInputs = [
117 cmake
118 ninja
119 autoconf # for vendored jemalloc
120 flatbuffers
121 ] ++ lib.optional stdenv.isDarwin fixDarwinDylibNames;
122 buildInputs = [
123 boost
124 brotli
125 flatbuffers
126 gflags
127 glog
128 gtest
129 libbacktrace
130 lz4
131 nlohmann_json # alternative JSON parser to rapidjson
132 protobuf # substrait requires protobuf
133 rapidjson
134 re2
135 snappy
136 thrift
137 utf8proc
138 zlib
139 zstd
140 ] ++ lib.optionals enableShared [
141 python3.pkgs.python
142 python3.pkgs.numpy
143 ] ++ lib.optionals enableFlight [
144 grpc
145 openssl
146 protobuf
147 ] ++ lib.optionals enableS3 [ aws-sdk-cpp openssl ]
148 ++ lib.optionals enableGcs [
149 crc32c
150 curl
151 google-cloud-cpp grpc
152 nlohmann_json
153 ];
154
155 preConfigure = ''
156 patchShebangs build-support/
157 substituteInPlace "src/arrow/vendored/datetime/tz.cpp" \
158 --replace 'discover_tz_dir();' '"${tzdata}/share/zoneinfo";'
159 '';
160
161 cmakeFlags = [
162 "-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}"
163 "-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}"
164 "-DARROW_BUILD_TESTS=ON"
165 "-DARROW_BUILD_INTEGRATION=ON"
166 "-DARROW_BUILD_UTILITIES=ON"
167 "-DARROW_EXTRA_ERROR_CONTEXT=ON"
168 "-DARROW_VERBOSE_THIRDPARTY_BUILD=ON"
169 "-DARROW_DEPENDENCY_SOURCE=SYSTEM"
170 "-Dxsimd_SOURCE=AUTO"
171 "-DARROW_DEPENDENCY_USE_SHARED=${if enableShared then "ON" else "OFF"}"
172 "-DARROW_COMPUTE=ON"
173 "-DARROW_CSV=ON"
174 "-DARROW_DATASET=ON"
175 "-DARROW_ENGINE=ON"
176 "-DARROW_FILESYSTEM=ON"
177 "-DARROW_FLIGHT_SQL=${if enableFlight then "ON" else "OFF"}"
178 "-DARROW_HDFS=ON"
179 "-DARROW_IPC=ON"
180 "-DARROW_JEMALLOC=${if enableJemalloc then "ON" else "OFF"}"
181 "-DARROW_JSON=ON"
182 "-DARROW_PLASMA=ON"
183 # Disable Python for static mode because openblas is currently broken there.
184 "-DARROW_PYTHON=${if enableShared then "ON" else "OFF"}"
185 "-DARROW_USE_GLOG=ON"
186 "-DARROW_WITH_BACKTRACE=ON"
187 "-DARROW_WITH_BROTLI=ON"
188 "-DARROW_WITH_LZ4=ON"
189 "-DARROW_WITH_NLOHMANN_JSON=ON"
190 "-DARROW_WITH_SNAPPY=ON"
191 "-DARROW_WITH_UTF8PROC=ON"
192 "-DARROW_WITH_ZLIB=ON"
193 "-DARROW_WITH_ZSTD=ON"
194 "-DARROW_MIMALLOC=ON"
195 # Parquet options:
196 "-DARROW_PARQUET=ON"
197 "-DARROW_SUBSTRAIT=ON"
198 "-DPARQUET_BUILD_EXECUTABLES=ON"
199 "-DARROW_FLIGHT=${if enableFlight then "ON" else "OFF"}"
200 "-DARROW_FLIGHT_TESTING=${if enableFlight then "ON" else "OFF"}"
201 "-DARROW_S3=${if enableS3 then "ON" else "OFF"}"
202 "-DARROW_GCS=${if enableGcs then "ON" else "OFF"}"
203 ] ++ lib.optionals (!enableShared) [
204 "-DARROW_TEST_LINKAGE=static"
205 ] ++ lib.optionals stdenv.isDarwin [
206 "-DCMAKE_INSTALL_RPATH=@loader_path/../lib" # needed for tools executables
207 ] ++ lib.optional (!stdenv.isx86_64) "-DARROW_USE_SIMD=OFF"
208 ++ lib.optional enableS3 "-DAWSSDK_CORE_HEADER_FILE=${aws-sdk-cpp}/include/aws/core/Aws.h";
209
210 doInstallCheck = true;
211 ARROW_TEST_DATA = lib.optionalString doInstallCheck "${arrow-testing}/data";
212 PARQUET_TEST_DATA = lib.optionalString doInstallCheck "${parquet-testing}/data";
213 GTEST_FILTER =
214 let
215 # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11398
216 filteredTests = lib.optionals stdenv.hostPlatform.isAarch64 [
217 "TestFilterKernelWithNumeric/3.CompareArrayAndFilterRandomNumeric"
218 "TestFilterKernelWithNumeric/7.CompareArrayAndFilterRandomNumeric"
219 "TestCompareKernel.PrimitiveRandomTests"
220 ] ++ lib.optionals enableS3 [
221 "S3OptionsTest.FromUri"
222 "S3RegionResolutionTest.NonExistentBucket"
223 "S3RegionResolutionTest.PublicBucket"
224 "S3RegionResolutionTest.RestrictedBucket"
225 "TestMinioServer.Connect"
226 "TestS3FS.*"
227 "TestS3FSGeneric.*"
228 ];
229 in
230 lib.optionalString doInstallCheck "-${builtins.concatStringsSep ":" filteredTests}";
231 __darwinAllowLocalNetworking = true;
232 installCheckInputs = [ perl which sqlite ] ++ lib.optional enableS3 minio;
233 installCheckPhase =
234 let
235 excludedTests = lib.optionals stdenv.isDarwin [
236 # Some plasma tests need to be patched to use a shorter AF_UNIX socket
237 # path on Darwin. See https://github.com/NixOS/nix/pull/1085
238 "plasma-external-store-tests"
239 "plasma-client-tests"
240 ] ++ [ "arrow-gcsfs-test" ];
241 in
242 ''
243 runHook preInstallCheck
244
245 ctest -L unittest \
246 --exclude-regex '^(${builtins.concatStringsSep "|" excludedTests})$'
247
248 runHook postInstallCheck
249 '';
250
251 meta = with lib; {
252 description = "A cross-language development platform for in-memory data";
253 homepage = "https://arrow.apache.org/docs/cpp/";
254 license = licenses.asl20;
255 platforms = platforms.unix;
256 maintainers = with maintainers; [ tobim veprbl cpcloud ];
257 };
258 passthru = {
259 inherit enableFlight enableJemalloc enableS3 enableGcs;
260 };
261}