1{
2 stdenv,
3 lib,
4 fetchurl,
5 fetchFromGitHub,
6 fixDarwinDylibNames,
7 apache-orc,
8 autoconf,
9 aws-sdk-cpp,
10 aws-sdk-cpp-arrow ? aws-sdk-cpp.override {
11 apis = [
12 "cognito-identity"
13 "config"
14 "identity-management"
15 "s3"
16 "sts"
17 "transfer"
18 ];
19 },
20 boost,
21 brotli,
22 bzip2,
23 cmake,
24 crc32c,
25 curl,
26 flatbuffers,
27 gflags,
28 glog,
29 google-cloud-cpp,
30 grpc,
31 gtest,
32 libbacktrace,
33 lz4,
34 minio,
35 ninja,
36 nlohmann_json,
37 openssl,
38 perl,
39 pkg-config,
40 protobuf_31,
41 python3,
42 rapidjson,
43 re2,
44 snappy,
45 sqlite,
46 thrift,
47 tzdata,
48 utf8proc,
49 which,
50 zlib,
51 zstd,
52 testers,
53 enableShared ? !stdenv.hostPlatform.isStatic,
54 enableFlight ? stdenv.buildPlatform == stdenv.hostPlatform,
55 # Disable also on RiscV
56 # configure: error: cannot determine number of significant virtual address bits
57 enableJemalloc ?
58 !stdenv.hostPlatform.isDarwin && !stdenv.hostPlatform.isAarch64 && !stdenv.hostPlatform.isRiscV64,
59 enableS3 ? true,
60 # google-cloud-cpp fails to build on RiscV
61 enableGcs ? !stdenv.hostPlatform.isDarwin && !stdenv.hostPlatform.isRiscV64,
62}:
63
64let
65 arrow-testing = fetchFromGitHub {
66 name = "arrow-testing";
67 owner = "apache";
68 repo = "arrow-testing";
69 rev = "d2a13712303498963395318a4eb42872e66aead7";
70 hash = "sha256-c8FL37kG0uo7o0Zp71WjCl7FD5BnVgqUCCXXX9gI0lg=";
71 };
72
73 parquet-testing = fetchFromGitHub {
74 name = "parquet-testing";
75 owner = "apache";
76 repo = "parquet-testing";
77 rev = "18d17540097fca7c40be3d42c167e6bfad90763c";
78 hash = "sha256-gKEQc2RKpVp39RmuZbIeIXAwiAXDHGnLXF6VQuJtnRA=";
79 };
80
81 version = "20.0.0";
82in
83stdenv.mkDerivation (finalAttrs: {
84 pname = "arrow-cpp";
85 inherit version;
86
87 src = fetchFromGitHub {
88 owner = "apache";
89 repo = "arrow";
90 rev = "apache-arrow-${version}";
91 hash = "sha256-JFPdKraCU+xRkBTAHyY4QGnBVlOjQ1P5+gq9uxyqJtk=";
92 };
93
94 sourceRoot = "${finalAttrs.src.name}/cpp";
95
96 # versions are all taken from
97 # https://github.com/apache/arrow/blob/apache-arrow-${version}/cpp/thirdparty/versions.txt
98
99 # jemalloc: arrow uses a custom prefix to prevent default allocator symbol
100 # collisions as well as custom build flags
101 ${if enableJemalloc then "ARROW_JEMALLOC_URL" else null} = fetchurl {
102 url = "https://github.com/jemalloc/jemalloc/releases/download/5.3.0/jemalloc-5.3.0.tar.bz2";
103 hash = "sha256-LbgtHnEZ3z5xt2QCGbbf6EeJvAU3mDw7esT3GJrs/qo=";
104 };
105
106 # mimalloc: arrow uses custom build flags for mimalloc
107 ARROW_MIMALLOC_URL = fetchFromGitHub {
108 owner = "microsoft";
109 repo = "mimalloc";
110 rev = "v2.0.6";
111 hash = "sha256-u2ITXABBN/dwU+mCIbL3tN1f4c17aBuSdNTV+Adtohc=";
112 };
113
114 ARROW_XSIMD_URL = fetchFromGitHub {
115 owner = "xtensor-stack";
116 repo = "xsimd";
117 rev = "13.0.0";
118 hash = "sha256-qElJYW5QDj3s59L3NgZj5zkhnUMzIP2mBa1sPks3/CE=";
119 };
120
121 ARROW_SUBSTRAIT_URL = fetchFromGitHub {
122 owner = "substrait-io";
123 repo = "substrait";
124 rev = "v0.44.0";
125 hash = "sha256-V739IFTGPtbGPlxcOi8sAaYSDhNUEpITvN9IqdPReug=";
126 };
127
128 nativeBuildInputs = [
129 cmake
130 pkg-config
131 ninja
132 autoconf # for vendored jemalloc
133 flatbuffers
134 ]
135 ++ lib.optional stdenv.hostPlatform.isDarwin fixDarwinDylibNames;
136 buildInputs = [
137 apache-orc
138 boost
139 brotli
140 bzip2
141 flatbuffers
142 gflags
143 glog
144 gtest
145 libbacktrace
146 lz4
147 nlohmann_json # alternative JSON parser to rapidjson
148 protobuf_31 # substrait requires protobuf
149 rapidjson
150 re2
151 snappy
152 thrift
153 utf8proc
154 zlib
155 zstd
156 ]
157 ++ lib.optionals enableFlight [
158 grpc
159 openssl
160 protobuf_31
161 sqlite
162 ]
163 ++ lib.optionals enableS3 [
164 aws-sdk-cpp-arrow
165 openssl
166 ]
167 ++ lib.optionals enableGcs [
168 crc32c
169 curl
170 google-cloud-cpp
171 grpc
172 nlohmann_json
173 ];
174
175 # apache-orc looks for things in caps
176 env = {
177 LZ4_ROOT = lz4;
178 ZSTD_ROOT = zstd.dev;
179 };
180
181 # fails tests on glibc with this enabled
182 hardeningDisable = [ "glibcxxassertions" ];
183
184 preConfigure = ''
185 patchShebangs build-support/
186 substituteInPlace "src/arrow/vendored/datetime/tz.cpp" \
187 --replace-fail 'discover_tz_dir();' '"${tzdata}/share/zoneinfo";'
188 '';
189
190 cmakeFlags = [
191 "-DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON"
192 "-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}"
193 "-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}"
194 "-DARROW_BUILD_TESTS=${if enableShared then "ON" else "OFF"}"
195 "-DARROW_BUILD_INTEGRATION=ON"
196 "-DARROW_BUILD_UTILITIES=ON"
197 "-DARROW_EXTRA_ERROR_CONTEXT=ON"
198 "-DARROW_VERBOSE_THIRDPARTY_BUILD=ON"
199 "-DARROW_DEPENDENCY_SOURCE=SYSTEM"
200 "-Dxsimd_SOURCE=AUTO"
201 "-DARROW_DEPENDENCY_USE_SHARED=${if enableShared then "ON" else "OFF"}"
202 "-DARROW_COMPUTE=ON"
203 "-DARROW_CSV=ON"
204 "-DARROW_DATASET=ON"
205 "-DARROW_FILESYSTEM=ON"
206 "-DARROW_FLIGHT_SQL=${if enableFlight then "ON" else "OFF"}"
207 "-DARROW_HDFS=ON"
208 "-DARROW_IPC=ON"
209 "-DARROW_JEMALLOC=${if enableJemalloc then "ON" else "OFF"}"
210 "-DARROW_JSON=ON"
211 "-DARROW_USE_GLOG=ON"
212 "-DARROW_WITH_BACKTRACE=ON"
213 "-DARROW_WITH_BROTLI=ON"
214 "-DARROW_WITH_BZ2=ON"
215 "-DARROW_WITH_LZ4=ON"
216 "-DARROW_WITH_NLOHMANN_JSON=ON"
217 "-DARROW_WITH_SNAPPY=ON"
218 "-DARROW_WITH_UTF8PROC=ON"
219 "-DARROW_WITH_ZLIB=ON"
220 "-DARROW_WITH_ZSTD=ON"
221 "-DARROW_MIMALLOC=ON"
222 "-DARROW_SUBSTRAIT=ON"
223 "-DARROW_FLIGHT=${if enableFlight then "ON" else "OFF"}"
224 "-DARROW_FLIGHT_TESTING=${if enableFlight then "ON" else "OFF"}"
225 "-DARROW_S3=${if enableS3 then "ON" else "OFF"}"
226 "-DARROW_GCS=${if enableGcs then "ON" else "OFF"}"
227 "-DARROW_ORC=ON"
228 # Parquet options:
229 "-DARROW_PARQUET=ON"
230 "-DPARQUET_BUILD_EXECUTABLES=ON"
231 "-DPARQUET_REQUIRE_ENCRYPTION=ON"
232 ]
233 ++ lib.optionals (!enableShared) [ "-DARROW_TEST_LINKAGE=static" ]
234 ++ lib.optionals stdenv.hostPlatform.isDarwin [
235 "-DCMAKE_INSTALL_RPATH=@loader_path/../lib" # needed for tools executables
236 ]
237 ++ lib.optionals (!stdenv.hostPlatform.isx86_64) [ "-DARROW_USE_SIMD=OFF" ]
238 ++ lib.optionals enableS3 [
239 "-DAWSSDK_CORE_HEADER_FILE=${aws-sdk-cpp-arrow}/include/aws/core/Aws.h"
240 ];
241
242 doInstallCheck = true;
243 ARROW_TEST_DATA = lib.optionalString finalAttrs.doInstallCheck "${arrow-testing}/data";
244 PARQUET_TEST_DATA = lib.optionalString finalAttrs.doInstallCheck "${parquet-testing}/data";
245 GTEST_FILTER =
246 let
247 # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11398
248 filteredTests =
249 lib.optionals stdenv.hostPlatform.isAarch64 [
250 "TestFilterKernelWithNumeric/3.CompareArrayAndFilterRandomNumeric"
251 "TestFilterKernelWithNumeric/7.CompareArrayAndFilterRandomNumeric"
252 "TestCompareKernel.PrimitiveRandomTests"
253 ]
254 ++ lib.optionals enableS3 [
255 "S3OptionsTest.FromUri"
256 "S3RegionResolutionTest.NonExistentBucket"
257 "S3RegionResolutionTest.PublicBucket"
258 "S3RegionResolutionTest.RestrictedBucket"
259 "TestMinioServer.Connect"
260 "TestS3FS.*"
261 "TestS3FSGeneric.*"
262 ]
263 ++ lib.optionals stdenv.hostPlatform.isDarwin [
264 # TODO: revisit at 12.0.0 or when
265 # https://github.com/apache/arrow/commit/295c6644ca6b67c95a662410b2c7faea0920c989
266 # is available, see
267 # https://github.com/apache/arrow/pull/15288#discussion_r1071244661
268 "ExecPlanExecution.StressSourceSinkStopped"
269 ];
270 in
271 lib.optionalString finalAttrs.doInstallCheck "-${lib.concatStringsSep ":" filteredTests}";
272
273 __darwinAllowLocalNetworking = true;
274
275 nativeInstallCheckInputs = [
276 perl
277 which
278 sqlite
279 ]
280 ++ lib.optionals enableS3 [ minio ]
281 ++ lib.optionals enableFlight [ python3 ];
282
283 installCheckPhase =
284 let
285 disabledTests = [
286 # flaky
287 "arrow-flight-test"
288 # requires networking
289 "arrow-gcsfs-test"
290 "arrow-flight-integration-test"
291 # File already exists in database: orc_proto.proto
292 "arrow-orc-adapter-test"
293 ];
294 in
295 ''
296 runHook preInstallCheck
297
298 ctest -L unittest --exclude-regex '^(${lib.concatStringsSep "|" disabledTests})$'
299
300 runHook postInstallCheck
301 '';
302
303 meta = with lib; {
304 description = "Cross-language development platform for in-memory data";
305 homepage = "https://arrow.apache.org/docs/cpp/";
306 license = licenses.asl20;
307 platforms = platforms.unix;
308 maintainers = with maintainers; [
309 tobim
310 veprbl
311 cpcloud
312 ];
313 pkgConfigModules = [
314 "arrow"
315 "arrow-acero"
316 "arrow-compute"
317 "arrow-csv"
318 "arrow-dataset"
319 "arrow-filesystem"
320 "arrow-flight"
321 "arrow-flight-sql"
322 "arrow-flight-testing"
323 "arrow-json"
324 "arrow-substrait"
325 "arrow-testing"
326 "parquet"
327 ];
328 };
329 passthru = {
330 inherit
331 enableFlight
332 enableJemalloc
333 enableS3
334 enableGcs
335 ;
336 tests.pkg-config = testers.testMetaPkgConfig finalAttrs.finalPackage;
337 };
338})