1# This is is arrow-cpp < 20 used as a workaround for
2# Ceph not supporting >= yet, taken from nixpkgs commit
3# 97ae53798f6a7c7c3c259ad8c2cbcede6ca34b2a~
4# This should be entirely removed when upstream bug
5# https://tracker.ceph.com/issues/71269
6# is fixed.
7{
8 stdenv,
9 lib,
10 fetchurl,
11 fetchpatch2,
12 fetchFromGitHub,
13 fixDarwinDylibNames,
14 autoconf,
15 aws-sdk-cpp,
16 aws-sdk-cpp-arrow ? aws-sdk-cpp.override {
17 apis = [
18 "cognito-identity"
19 "config"
20 "identity-management"
21 "s3"
22 "sts"
23 "transfer"
24 ];
25 },
26 boost,
27 brotli,
28 bzip2,
29 cmake,
30 crc32c,
31 curl,
32 flatbuffers,
33 gflags,
34 glog,
35 google-cloud-cpp,
36 grpc,
37 gtest,
38 libbacktrace,
39 lz4,
40 minio,
41 ninja,
42 nlohmann_json,
43 openssl,
44 perl,
45 pkg-config,
46 protobuf,
47 python3,
48 rapidjson,
49 re2,
50 snappy,
51 sqlite,
52 thrift,
53 tzdata,
54 utf8proc,
55 which,
56 zlib,
57 zstd,
58 testers,
59 enableShared ? !stdenv.hostPlatform.isStatic,
60 enableFlight ? stdenv.buildPlatform == stdenv.hostPlatform,
61 # Disable also on RiscV
62 # configure: error: cannot determine number of significant virtual address bits
63 enableJemalloc ?
64 !stdenv.hostPlatform.isDarwin && !stdenv.hostPlatform.isAarch64 && !stdenv.hostPlatform.isRiscV64,
65 enableS3 ? true,
66 # google-cloud-cpp fails to build on RiscV
67 enableGcs ? !stdenv.hostPlatform.isDarwin && !stdenv.hostPlatform.isRiscV64,
68}:
69
70let
71 arrow-testing = fetchFromGitHub {
72 name = "arrow-testing";
73 owner = "apache";
74 repo = "arrow-testing";
75 rev = "4d209492d514c2d3cb2d392681b9aa00e6d8da1c";
76 hash = "sha256-IkiCbuy0bWyClPZ4ZEdkEP7jFYLhM7RCuNLd6Lazd4o=";
77 };
78
79 parquet-testing = fetchFromGitHub {
80 name = "parquet-testing";
81 owner = "apache";
82 repo = "parquet-testing";
83 rev = "c7cf1374cf284c0c73024cd1437becea75558bf8";
84 hash = "sha256-DThjyZ34LajHwXZy1IhYKUGUG/ejQ9WvBNuI8eUKmSs=";
85 };
86
87 version = "19.0.1";
88in
89stdenv.mkDerivation (finalAttrs: {
90 pname = "arrow-cpp";
91 inherit version;
92
93 src = fetchFromGitHub {
94 owner = "apache";
95 repo = "arrow";
96 rev = "apache-arrow-${version}";
97 hash = "sha256-toHwUIOZRpgR0K7pQtT5nqWpO9G7AuHYTcvA6UVg9lA=";
98 };
99
100 sourceRoot = "${finalAttrs.src.name}/cpp";
101
102 patches = [
103 (fetchpatch2 {
104 name = "protobuf-30-compat.patch";
105 url = "https://github.com/apache/arrow/pull/46136.patch";
106 hash = "sha256-WTpe/eT3himlCHN/R78w1sF0HG859mE2ZN70U+9N8Ag=";
107 stripLen = 1;
108 })
109 ];
110
111 # versions are all taken from
112 # https://github.com/apache/arrow/blob/apache-arrow-${version}/cpp/thirdparty/versions.txt
113
114 # jemalloc: arrow uses a custom prefix to prevent default allocator symbol
115 # collisions as well as custom build flags
116 ${if enableJemalloc then "ARROW_JEMALLOC_URL" else null} = fetchurl {
117 url = "https://github.com/jemalloc/jemalloc/releases/download/5.3.0/jemalloc-5.3.0.tar.bz2";
118 hash = "sha256-LbgtHnEZ3z5xt2QCGbbf6EeJvAU3mDw7esT3GJrs/qo=";
119 };
120
121 # mimalloc: arrow uses custom build flags for mimalloc
122 ARROW_MIMALLOC_URL = fetchFromGitHub {
123 owner = "microsoft";
124 repo = "mimalloc";
125 rev = "v2.0.6";
126 hash = "sha256-u2ITXABBN/dwU+mCIbL3tN1f4c17aBuSdNTV+Adtohc=";
127 };
128
129 ARROW_XSIMD_URL = fetchFromGitHub {
130 owner = "xtensor-stack";
131 repo = "xsimd";
132 rev = "13.0.0";
133 hash = "sha256-qElJYW5QDj3s59L3NgZj5zkhnUMzIP2mBa1sPks3/CE=";
134 };
135
136 ARROW_SUBSTRAIT_URL = fetchFromGitHub {
137 owner = "substrait-io";
138 repo = "substrait";
139 rev = "v0.44.0";
140 hash = "sha256-V739IFTGPtbGPlxcOi8sAaYSDhNUEpITvN9IqdPReug=";
141 };
142
143 nativeBuildInputs = [
144 cmake
145 pkg-config
146 ninja
147 autoconf # for vendored jemalloc
148 flatbuffers
149 ]
150 ++ lib.optional stdenv.hostPlatform.isDarwin fixDarwinDylibNames;
151 buildInputs = [
152 boost
153 brotli
154 bzip2
155 flatbuffers
156 gflags
157 glog
158 gtest
159 libbacktrace
160 lz4
161 nlohmann_json # alternative JSON parser to rapidjson
162 protobuf # substrait requires protobuf
163 rapidjson
164 re2
165 snappy
166 thrift
167 utf8proc
168 zlib
169 zstd
170 ]
171 ++ lib.optionals enableFlight [
172 grpc
173 openssl
174 protobuf
175 sqlite
176 ]
177 ++ lib.optionals enableS3 [
178 aws-sdk-cpp-arrow
179 openssl
180 ]
181 ++ lib.optionals enableGcs [
182 crc32c
183 curl
184 google-cloud-cpp
185 grpc
186 nlohmann_json
187 ];
188
189 preConfigure = ''
190 patchShebangs build-support/
191 substituteInPlace "src/arrow/vendored/datetime/tz.cpp" \
192 --replace-fail 'discover_tz_dir();' '"${tzdata}/share/zoneinfo";'
193 '';
194
195 cmakeFlags = [
196 "-DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON"
197 "-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}"
198 "-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}"
199 "-DARROW_BUILD_TESTS=${if enableShared then "ON" else "OFF"}"
200 "-DARROW_BUILD_INTEGRATION=ON"
201 "-DARROW_BUILD_UTILITIES=ON"
202 "-DARROW_EXTRA_ERROR_CONTEXT=ON"
203 "-DARROW_VERBOSE_THIRDPARTY_BUILD=ON"
204 "-DARROW_DEPENDENCY_SOURCE=SYSTEM"
205 "-Dxsimd_SOURCE=AUTO"
206 "-DARROW_DEPENDENCY_USE_SHARED=${if enableShared then "ON" else "OFF"}"
207 "-DARROW_COMPUTE=ON"
208 "-DARROW_CSV=ON"
209 "-DARROW_DATASET=ON"
210 "-DARROW_FILESYSTEM=ON"
211 "-DARROW_FLIGHT_SQL=${if enableFlight then "ON" else "OFF"}"
212 "-DARROW_HDFS=ON"
213 "-DARROW_IPC=ON"
214 "-DARROW_JEMALLOC=${if enableJemalloc then "ON" else "OFF"}"
215 "-DARROW_JSON=ON"
216 "-DARROW_USE_GLOG=ON"
217 "-DARROW_WITH_BACKTRACE=ON"
218 "-DARROW_WITH_BROTLI=ON"
219 "-DARROW_WITH_BZ2=ON"
220 "-DARROW_WITH_LZ4=ON"
221 "-DARROW_WITH_NLOHMANN_JSON=ON"
222 "-DARROW_WITH_SNAPPY=ON"
223 "-DARROW_WITH_UTF8PROC=ON"
224 "-DARROW_WITH_ZLIB=ON"
225 "-DARROW_WITH_ZSTD=ON"
226 "-DARROW_MIMALLOC=ON"
227 "-DARROW_SUBSTRAIT=ON"
228 "-DARROW_FLIGHT=${if enableFlight then "ON" else "OFF"}"
229 "-DARROW_FLIGHT_TESTING=${if enableFlight then "ON" else "OFF"}"
230 "-DARROW_S3=${if enableS3 then "ON" else "OFF"}"
231 "-DARROW_GCS=${if enableGcs then "ON" else "OFF"}"
232 # Parquet options:
233 "-DARROW_PARQUET=ON"
234 "-DPARQUET_BUILD_EXECUTABLES=ON"
235 "-DPARQUET_REQUIRE_ENCRYPTION=ON"
236 ]
237 ++ lib.optionals (!enableShared) [ "-DARROW_TEST_LINKAGE=static" ]
238 ++ lib.optionals stdenv.hostPlatform.isDarwin [
239 "-DCMAKE_INSTALL_RPATH=@loader_path/../lib" # needed for tools executables
240 ]
241 ++ lib.optionals (!stdenv.hostPlatform.isx86_64) [ "-DARROW_USE_SIMD=OFF" ]
242 ++ lib.optionals enableS3 [
243 "-DAWSSDK_CORE_HEADER_FILE=${aws-sdk-cpp-arrow}/include/aws/core/Aws.h"
244 ];
245
246 doInstallCheck = true;
247 ARROW_TEST_DATA = lib.optionalString finalAttrs.doInstallCheck "${arrow-testing}/data";
248 PARQUET_TEST_DATA = lib.optionalString finalAttrs.doInstallCheck "${parquet-testing}/data";
249 GTEST_FILTER =
250 let
251 # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11398
252 filteredTests =
253 lib.optionals stdenv.hostPlatform.isAarch64 [
254 "TestFilterKernelWithNumeric/3.CompareArrayAndFilterRandomNumeric"
255 "TestFilterKernelWithNumeric/7.CompareArrayAndFilterRandomNumeric"
256 "TestCompareKernel.PrimitiveRandomTests"
257 ]
258 ++ lib.optionals enableS3 [
259 "S3OptionsTest.FromUri"
260 "S3RegionResolutionTest.NonExistentBucket"
261 "S3RegionResolutionTest.PublicBucket"
262 "S3RegionResolutionTest.RestrictedBucket"
263 "TestMinioServer.Connect"
264 "TestS3FS.*"
265 "TestS3FSGeneric.*"
266 ]
267 ++ lib.optionals stdenv.hostPlatform.isDarwin [
268 # TODO: revisit at 12.0.0 or when
269 # https://github.com/apache/arrow/commit/295c6644ca6b67c95a662410b2c7faea0920c989
270 # is available, see
271 # https://github.com/apache/arrow/pull/15288#discussion_r1071244661
272 "ExecPlanExecution.StressSourceSinkStopped"
273 ];
274 in
275 lib.optionalString finalAttrs.doInstallCheck "-${lib.concatStringsSep ":" filteredTests}";
276
277 __darwinAllowLocalNetworking = true;
278
279 nativeInstallCheckInputs = [
280 perl
281 which
282 sqlite
283 ]
284 ++ lib.optionals enableS3 [ minio ]
285 ++ lib.optionals enableFlight [ python3 ];
286
287 installCheckPhase =
288 let
289 disabledTests = [
290 # flaky
291 "arrow-flight-test"
292 # requires networking
293 "arrow-gcsfs-test"
294 "arrow-flight-integration-test"
295 ];
296 in
297 ''
298 runHook preInstallCheck
299
300 ctest -L unittest --exclude-regex '^(${lib.concatStringsSep "|" disabledTests})$'
301
302 runHook postInstallCheck
303 '';
304
305 meta = with lib; {
306 description = "Cross-language development platform for in-memory data";
307 homepage = "https://arrow.apache.org/docs/cpp/";
308 license = licenses.asl20;
309 platforms = platforms.unix;
310 maintainers = with maintainers; [
311 tobim
312 veprbl
313 cpcloud
314 ];
315 pkgConfigModules = [
316 "arrow"
317 "arrow-acero"
318 "arrow-compute"
319 "arrow-csv"
320 "arrow-dataset"
321 "arrow-filesystem"
322 "arrow-flight"
323 "arrow-flight-sql"
324 "arrow-flight-testing"
325 "arrow-json"
326 "arrow-substrait"
327 "arrow-testing"
328 "parquet"
329 ];
330 };
331 passthru = {
332 inherit
333 enableFlight
334 enableJemalloc
335 enableS3
336 enableGcs
337 ;
338 tests.pkg-config = testers.testMetaPkgConfig finalAttrs.finalPackage;
339 };
340})