1# This is is arrow-cpp < 20 used as a workaround for
2# Ceph not supporting >= yet, taken from nixpkgs commit
3# 97ae53798f6a7c7c3c259ad8c2cbcede6ca34b2a~
4# This should be entirely removed when upstream bug
5# https://tracker.ceph.com/issues/71269
6# is fixed.
7{
8 stdenv,
9 lib,
10 fetchurl,
11 fetchFromGitHub,
12 fixDarwinDylibNames,
13 autoconf,
14 aws-sdk-cpp,
15 aws-sdk-cpp-arrow ? aws-sdk-cpp.override {
16 apis = [
17 "cognito-identity"
18 "config"
19 "identity-management"
20 "s3"
21 "sts"
22 "transfer"
23 ];
24 },
25 boost,
26 brotli,
27 bzip2,
28 cmake,
29 crc32c,
30 curl,
31 flatbuffers,
32 gflags,
33 glog,
34 google-cloud-cpp,
35 grpc,
36 gtest,
37 libbacktrace,
38 lz4,
39 minio,
40 ninja,
41 nlohmann_json,
42 openssl,
43 perl,
44 pkg-config,
45 protobuf_29,
46 python3,
47 rapidjson,
48 re2,
49 snappy,
50 sqlite,
51 thrift,
52 tzdata,
53 utf8proc,
54 which,
55 zlib,
56 zstd,
57 testers,
58 enableShared ? !stdenv.hostPlatform.isStatic,
59 enableFlight ? stdenv.buildPlatform == stdenv.hostPlatform,
60 # Disable also on RiscV
61 # configure: error: cannot determine number of significant virtual address bits
62 enableJemalloc ?
63 !stdenv.hostPlatform.isDarwin && !stdenv.hostPlatform.isAarch64 && !stdenv.hostPlatform.isRiscV64,
64 enableS3 ? true,
65 # google-cloud-cpp fails to build on RiscV
66 enableGcs ? !stdenv.hostPlatform.isDarwin && !stdenv.hostPlatform.isRiscV64,
67}:
68
69let
70 # https://github.com/apache/arrow/issues/45807
71 protobuf = protobuf_29;
72
73 arrow-testing = fetchFromGitHub {
74 name = "arrow-testing";
75 owner = "apache";
76 repo = "arrow-testing";
77 rev = "4d209492d514c2d3cb2d392681b9aa00e6d8da1c";
78 hash = "sha256-IkiCbuy0bWyClPZ4ZEdkEP7jFYLhM7RCuNLd6Lazd4o=";
79 };
80
81 parquet-testing = fetchFromGitHub {
82 name = "parquet-testing";
83 owner = "apache";
84 repo = "parquet-testing";
85 rev = "c7cf1374cf284c0c73024cd1437becea75558bf8";
86 hash = "sha256-DThjyZ34LajHwXZy1IhYKUGUG/ejQ9WvBNuI8eUKmSs=";
87 };
88
89 version = "19.0.1";
90in
91stdenv.mkDerivation (finalAttrs: {
92 pname = "arrow-cpp";
93 inherit version;
94
95 src = fetchFromGitHub {
96 owner = "apache";
97 repo = "arrow";
98 rev = "apache-arrow-${version}";
99 hash = "sha256-toHwUIOZRpgR0K7pQtT5nqWpO9G7AuHYTcvA6UVg9lA=";
100 };
101
102 sourceRoot = "${finalAttrs.src.name}/cpp";
103
104 # versions are all taken from
105 # https://github.com/apache/arrow/blob/apache-arrow-${version}/cpp/thirdparty/versions.txt
106
107 # jemalloc: arrow uses a custom prefix to prevent default allocator symbol
108 # collisions as well as custom build flags
109 ${if enableJemalloc then "ARROW_JEMALLOC_URL" else null} = fetchurl {
110 url = "https://github.com/jemalloc/jemalloc/releases/download/5.3.0/jemalloc-5.3.0.tar.bz2";
111 hash = "sha256-LbgtHnEZ3z5xt2QCGbbf6EeJvAU3mDw7esT3GJrs/qo=";
112 };
113
114 # mimalloc: arrow uses custom build flags for mimalloc
115 ARROW_MIMALLOC_URL = fetchFromGitHub {
116 owner = "microsoft";
117 repo = "mimalloc";
118 rev = "v2.0.6";
119 hash = "sha256-u2ITXABBN/dwU+mCIbL3tN1f4c17aBuSdNTV+Adtohc=";
120 };
121
122 ARROW_XSIMD_URL = fetchFromGitHub {
123 owner = "xtensor-stack";
124 repo = "xsimd";
125 rev = "13.0.0";
126 hash = "sha256-qElJYW5QDj3s59L3NgZj5zkhnUMzIP2mBa1sPks3/CE=";
127 };
128
129 ARROW_SUBSTRAIT_URL = fetchFromGitHub {
130 owner = "substrait-io";
131 repo = "substrait";
132 rev = "v0.44.0";
133 hash = "sha256-V739IFTGPtbGPlxcOi8sAaYSDhNUEpITvN9IqdPReug=";
134 };
135
136 nativeBuildInputs = [
137 cmake
138 pkg-config
139 ninja
140 autoconf # for vendored jemalloc
141 flatbuffers
142 ]
143 ++ lib.optional stdenv.hostPlatform.isDarwin fixDarwinDylibNames;
144 buildInputs = [
145 boost
146 brotli
147 bzip2
148 flatbuffers
149 gflags
150 glog
151 gtest
152 libbacktrace
153 lz4
154 nlohmann_json # alternative JSON parser to rapidjson
155 protobuf # substrait requires protobuf
156 rapidjson
157 re2
158 snappy
159 thrift
160 utf8proc
161 zlib
162 zstd
163 ]
164 ++ lib.optionals enableFlight [
165 grpc
166 openssl
167 protobuf
168 sqlite
169 ]
170 ++ lib.optionals enableS3 [
171 aws-sdk-cpp-arrow
172 openssl
173 ]
174 ++ lib.optionals enableGcs [
175 crc32c
176 curl
177 google-cloud-cpp
178 grpc
179 nlohmann_json
180 ];
181
182 preConfigure = ''
183 patchShebangs build-support/
184 substituteInPlace "src/arrow/vendored/datetime/tz.cpp" \
185 --replace-fail 'discover_tz_dir();' '"${tzdata}/share/zoneinfo";'
186 '';
187
188 cmakeFlags = [
189 "-DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON"
190 "-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}"
191 "-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}"
192 "-DARROW_BUILD_TESTS=${if enableShared then "ON" else "OFF"}"
193 "-DARROW_BUILD_INTEGRATION=ON"
194 "-DARROW_BUILD_UTILITIES=ON"
195 "-DARROW_EXTRA_ERROR_CONTEXT=ON"
196 "-DARROW_VERBOSE_THIRDPARTY_BUILD=ON"
197 "-DARROW_DEPENDENCY_SOURCE=SYSTEM"
198 "-Dxsimd_SOURCE=AUTO"
199 "-DARROW_DEPENDENCY_USE_SHARED=${if enableShared then "ON" else "OFF"}"
200 "-DARROW_COMPUTE=ON"
201 "-DARROW_CSV=ON"
202 "-DARROW_DATASET=ON"
203 "-DARROW_FILESYSTEM=ON"
204 "-DARROW_FLIGHT_SQL=${if enableFlight then "ON" else "OFF"}"
205 "-DARROW_HDFS=ON"
206 "-DARROW_IPC=ON"
207 "-DARROW_JEMALLOC=${if enableJemalloc then "ON" else "OFF"}"
208 "-DARROW_JSON=ON"
209 "-DARROW_USE_GLOG=ON"
210 "-DARROW_WITH_BACKTRACE=ON"
211 "-DARROW_WITH_BROTLI=ON"
212 "-DARROW_WITH_BZ2=ON"
213 "-DARROW_WITH_LZ4=ON"
214 "-DARROW_WITH_NLOHMANN_JSON=ON"
215 "-DARROW_WITH_SNAPPY=ON"
216 "-DARROW_WITH_UTF8PROC=ON"
217 "-DARROW_WITH_ZLIB=ON"
218 "-DARROW_WITH_ZSTD=ON"
219 "-DARROW_MIMALLOC=ON"
220 "-DARROW_SUBSTRAIT=ON"
221 "-DARROW_FLIGHT=${if enableFlight then "ON" else "OFF"}"
222 "-DARROW_FLIGHT_TESTING=${if enableFlight then "ON" else "OFF"}"
223 "-DARROW_S3=${if enableS3 then "ON" else "OFF"}"
224 "-DARROW_GCS=${if enableGcs then "ON" else "OFF"}"
225 # Parquet options:
226 "-DARROW_PARQUET=ON"
227 "-DPARQUET_BUILD_EXECUTABLES=ON"
228 "-DPARQUET_REQUIRE_ENCRYPTION=ON"
229 ]
230 ++ lib.optionals (!enableShared) [ "-DARROW_TEST_LINKAGE=static" ]
231 ++ lib.optionals stdenv.hostPlatform.isDarwin [
232 "-DCMAKE_INSTALL_RPATH=@loader_path/../lib" # needed for tools executables
233 ]
234 ++ lib.optionals (!stdenv.hostPlatform.isx86_64) [ "-DARROW_USE_SIMD=OFF" ]
235 ++ lib.optionals enableS3 [
236 "-DAWSSDK_CORE_HEADER_FILE=${aws-sdk-cpp-arrow}/include/aws/core/Aws.h"
237 ];
238
239 doInstallCheck = true;
240 ARROW_TEST_DATA = lib.optionalString finalAttrs.doInstallCheck "${arrow-testing}/data";
241 PARQUET_TEST_DATA = lib.optionalString finalAttrs.doInstallCheck "${parquet-testing}/data";
242 GTEST_FILTER =
243 let
244 # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11398
245 filteredTests =
246 lib.optionals stdenv.hostPlatform.isAarch64 [
247 "TestFilterKernelWithNumeric/3.CompareArrayAndFilterRandomNumeric"
248 "TestFilterKernelWithNumeric/7.CompareArrayAndFilterRandomNumeric"
249 "TestCompareKernel.PrimitiveRandomTests"
250 ]
251 ++ lib.optionals enableS3 [
252 "S3OptionsTest.FromUri"
253 "S3RegionResolutionTest.NonExistentBucket"
254 "S3RegionResolutionTest.PublicBucket"
255 "S3RegionResolutionTest.RestrictedBucket"
256 "TestMinioServer.Connect"
257 "TestS3FS.*"
258 "TestS3FSGeneric.*"
259 ]
260 ++ lib.optionals stdenv.hostPlatform.isDarwin [
261 # TODO: revisit at 12.0.0 or when
262 # https://github.com/apache/arrow/commit/295c6644ca6b67c95a662410b2c7faea0920c989
263 # is available, see
264 # https://github.com/apache/arrow/pull/15288#discussion_r1071244661
265 "ExecPlanExecution.StressSourceSinkStopped"
266 ];
267 in
268 lib.optionalString finalAttrs.doInstallCheck "-${lib.concatStringsSep ":" filteredTests}";
269
270 __darwinAllowLocalNetworking = true;
271
272 nativeInstallCheckInputs = [
273 perl
274 which
275 sqlite
276 ]
277 ++ lib.optionals enableS3 [ minio ]
278 ++ lib.optionals enableFlight [ python3 ];
279
280 installCheckPhase =
281 let
282 disabledTests = [
283 # flaky
284 "arrow-flight-test"
285 # requires networking
286 "arrow-gcsfs-test"
287 "arrow-flight-integration-test"
288 ];
289 in
290 ''
291 runHook preInstallCheck
292
293 ctest -L unittest --exclude-regex '^(${lib.concatStringsSep "|" disabledTests})$'
294
295 runHook postInstallCheck
296 '';
297
298 meta = with lib; {
299 description = "Cross-language development platform for in-memory data";
300 homepage = "https://arrow.apache.org/docs/cpp/";
301 license = licenses.asl20;
302 platforms = platforms.unix;
303 maintainers = with maintainers; [
304 tobim
305 veprbl
306 cpcloud
307 ];
308 pkgConfigModules = [
309 "arrow"
310 "arrow-acero"
311 "arrow-compute"
312 "arrow-csv"
313 "arrow-dataset"
314 "arrow-filesystem"
315 "arrow-flight"
316 "arrow-flight-sql"
317 "arrow-flight-testing"
318 "arrow-json"
319 "arrow-substrait"
320 "arrow-testing"
321 "parquet"
322 ];
323 };
324 passthru = {
325 inherit
326 enableFlight
327 enableJemalloc
328 enableS3
329 enableGcs
330 ;
331 tests.pkg-config = testers.testMetaPkgConfig finalAttrs.finalPackage;
332 };
333})