nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
1# This is is arrow-cpp < 20 used as a workaround for
2# Ceph not supporting >= yet, taken from nixpkgs commit
3# 97ae53798f6a7c7c3c259ad8c2cbcede6ca34b2a~
4# This should be entirely removed when upstream bug
5# https://tracker.ceph.com/issues/71269
6# is fixed.
7{
8 stdenv,
9 lib,
10 fetchurl,
11 fetchpatch2,
12 fetchFromGitHub,
13 fixDarwinDylibNames,
14 autoconf,
15 aws-sdk-cpp,
16 aws-sdk-cpp-arrow ? aws-sdk-cpp.override {
17 apis = [
18 "cognito-identity"
19 "config"
20 "identity-management"
21 "s3"
22 "sts"
23 "transfer"
24 ];
25 },
26 boost,
27 brotli,
28 bzip2,
29 cmake,
30 crc32c,
31 curl,
32 flatbuffers,
33 gflags,
34 glog,
35 google-cloud-cpp,
36 grpc,
37 gtest,
38 libbacktrace,
39 lz4,
40 minio,
41 ninja,
42 nlohmann_json,
43 openssl,
44 perl,
45 pkg-config,
46 protobuf_32,
47 python3,
48 rapidjson,
49 re2,
50 snappy,
51 sqlite,
52 thrift,
53 tzdata,
54 utf8proc,
55 which,
56 zlib,
57 zstd,
58 testers,
59 enableShared ? !stdenv.hostPlatform.isStatic,
60 enableFlight ? stdenv.buildPlatform == stdenv.hostPlatform,
61 # Disable also on RiscV
62 # configure: error: cannot determine number of significant virtual address bits
63 enableJemalloc ?
64 !stdenv.hostPlatform.isDarwin && !stdenv.hostPlatform.isAarch64 && !stdenv.hostPlatform.isRiscV64,
65 enableS3 ? true,
66 # google-cloud-cpp fails to build on RiscV
67 enableGcs ? !stdenv.hostPlatform.isDarwin && !stdenv.hostPlatform.isRiscV64,
68}:
69
70let
71 arrow-testing = fetchFromGitHub {
72 name = "arrow-testing";
73 owner = "apache";
74 repo = "arrow-testing";
75 rev = "4d209492d514c2d3cb2d392681b9aa00e6d8da1c";
76 hash = "sha256-IkiCbuy0bWyClPZ4ZEdkEP7jFYLhM7RCuNLd6Lazd4o=";
77 };
78
79 parquet-testing = fetchFromGitHub {
80 name = "parquet-testing";
81 owner = "apache";
82 repo = "parquet-testing";
83 rev = "c7cf1374cf284c0c73024cd1437becea75558bf8";
84 hash = "sha256-DThjyZ34LajHwXZy1IhYKUGUG/ejQ9WvBNuI8eUKmSs=";
85 };
86
87 version = "19.0.1";
88in
89stdenv.mkDerivation (finalAttrs: {
90 pname = "arrow-cpp";
91 inherit version;
92
93 src = fetchFromGitHub {
94 owner = "apache";
95 repo = "arrow";
96 rev = "apache-arrow-${version}";
97 hash = "sha256-toHwUIOZRpgR0K7pQtT5nqWpO9G7AuHYTcvA6UVg9lA=";
98 };
99
100 sourceRoot = "${finalAttrs.src.name}/cpp";
101
102 patches = [
103 (fetchpatch2 {
104 name = "protobuf-30-compat.patch";
105 url = "https://github.com/apache/arrow/pull/46136.patch";
106 hash = "sha256-WTpe/eT3himlCHN/R78w1sF0HG859mE2ZN70U+9N8Ag=";
107 stripLen = 1;
108 })
109 (fetchpatch2 {
110 name = "cmake-fix.patch";
111 url = "https://github.com/apache/arrow/commit/48c0bbbd4a2eedcca518caeb7f7547c7988dc740.patch?full_index=1";
112 hash = "sha256-i/vZy/61VYP+mo1AxfoiBSjTip04vhFOh3hGjHCJy6g=";
113 stripLen = 1; # applying patch from within `cpp/` subdirectory
114 })
115 ];
116
117 # versions are all taken from
118 # https://github.com/apache/arrow/blob/apache-arrow-${version}/cpp/thirdparty/versions.txt
119
120 # jemalloc: arrow uses a custom prefix to prevent default allocator symbol
121 # collisions as well as custom build flags
122 ${if enableJemalloc then "ARROW_JEMALLOC_URL" else null} = fetchurl {
123 url = "https://github.com/jemalloc/jemalloc/releases/download/5.3.0/jemalloc-5.3.0.tar.bz2";
124 hash = "sha256-LbgtHnEZ3z5xt2QCGbbf6EeJvAU3mDw7esT3GJrs/qo=";
125 };
126
127 # mimalloc: arrow uses custom build flags for mimalloc
128 ARROW_MIMALLOC_URL = fetchFromGitHub {
129 owner = "microsoft";
130 repo = "mimalloc";
131 rev = "v2.0.6";
132 hash = "sha256-u2ITXABBN/dwU+mCIbL3tN1f4c17aBuSdNTV+Adtohc=";
133 };
134
135 ARROW_XSIMD_URL = fetchFromGitHub {
136 owner = "xtensor-stack";
137 repo = "xsimd";
138 rev = "13.0.0";
139 hash = "sha256-qElJYW5QDj3s59L3NgZj5zkhnUMzIP2mBa1sPks3/CE=";
140 };
141
142 ARROW_SUBSTRAIT_URL = fetchFromGitHub {
143 owner = "substrait-io";
144 repo = "substrait";
145 rev = "v0.44.0";
146 hash = "sha256-V739IFTGPtbGPlxcOi8sAaYSDhNUEpITvN9IqdPReug=";
147 };
148
149 nativeBuildInputs = [
150 cmake
151 pkg-config
152 ninja
153 autoconf # for vendored jemalloc
154 flatbuffers
155 ]
156 ++ lib.optional stdenv.hostPlatform.isDarwin fixDarwinDylibNames;
157 buildInputs = [
158 boost
159 brotli
160 bzip2
161 flatbuffers
162 gflags
163 glog
164 gtest
165 libbacktrace
166 lz4
167 nlohmann_json # alternative JSON parser to rapidjson
168 protobuf_32 # substrait requires protobuf
169 rapidjson
170 re2
171 snappy
172 thrift
173 utf8proc
174 zlib
175 zstd
176 ]
177 ++ lib.optionals enableFlight [
178 grpc
179 openssl
180 sqlite
181 ]
182 ++ lib.optionals enableS3 [
183 aws-sdk-cpp-arrow
184 openssl
185 ]
186 ++ lib.optionals enableGcs [
187 crc32c
188 curl
189 google-cloud-cpp
190 grpc
191 nlohmann_json
192 ];
193
194 preConfigure = ''
195 patchShebangs build-support/
196 substituteInPlace "src/arrow/vendored/datetime/tz.cpp" \
197 --replace-fail 'discover_tz_dir();' '"${tzdata}/share/zoneinfo";'
198 '';
199
200 cmakeFlags = [
201 "-DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON"
202 "-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}"
203 "-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}"
204 "-DARROW_BUILD_TESTS=${if enableShared then "ON" else "OFF"}"
205 "-DARROW_BUILD_INTEGRATION=ON"
206 "-DARROW_BUILD_UTILITIES=ON"
207 "-DARROW_EXTRA_ERROR_CONTEXT=ON"
208 "-DARROW_VERBOSE_THIRDPARTY_BUILD=ON"
209 "-DARROW_DEPENDENCY_SOURCE=SYSTEM"
210 "-Dxsimd_SOURCE=AUTO"
211 "-DARROW_DEPENDENCY_USE_SHARED=${if enableShared then "ON" else "OFF"}"
212 "-DARROW_COMPUTE=ON"
213 "-DARROW_CSV=ON"
214 "-DARROW_DATASET=ON"
215 "-DARROW_FILESYSTEM=ON"
216 "-DARROW_FLIGHT_SQL=${if enableFlight then "ON" else "OFF"}"
217 "-DARROW_HDFS=ON"
218 "-DARROW_IPC=ON"
219 "-DARROW_JEMALLOC=${if enableJemalloc then "ON" else "OFF"}"
220 "-DARROW_JSON=ON"
221 "-DARROW_USE_GLOG=ON"
222 "-DARROW_WITH_BACKTRACE=ON"
223 "-DARROW_WITH_BROTLI=ON"
224 "-DARROW_WITH_BZ2=ON"
225 "-DARROW_WITH_LZ4=ON"
226 "-DARROW_WITH_NLOHMANN_JSON=ON"
227 "-DARROW_WITH_SNAPPY=ON"
228 "-DARROW_WITH_UTF8PROC=ON"
229 "-DARROW_WITH_ZLIB=ON"
230 "-DARROW_WITH_ZSTD=ON"
231 "-DARROW_MIMALLOC=ON"
232 "-DARROW_SUBSTRAIT=ON"
233 "-DARROW_FLIGHT=${if enableFlight then "ON" else "OFF"}"
234 "-DARROW_FLIGHT_TESTING=${if enableFlight then "ON" else "OFF"}"
235 "-DARROW_S3=${if enableS3 then "ON" else "OFF"}"
236 "-DARROW_GCS=${if enableGcs then "ON" else "OFF"}"
237 # Parquet options:
238 "-DARROW_PARQUET=ON"
239 "-DPARQUET_BUILD_EXECUTABLES=ON"
240 "-DPARQUET_REQUIRE_ENCRYPTION=ON"
241 ]
242 ++ lib.optionals (!enableShared) [ "-DARROW_TEST_LINKAGE=static" ]
243 ++ lib.optionals stdenv.hostPlatform.isDarwin [
244 "-DCMAKE_INSTALL_RPATH=@loader_path/../lib" # needed for tools executables
245 ]
246 ++ lib.optionals (!stdenv.hostPlatform.isx86_64) [ "-DARROW_USE_SIMD=OFF" ]
247 ++ lib.optionals enableS3 [
248 "-DAWSSDK_CORE_HEADER_FILE=${aws-sdk-cpp-arrow}/include/aws/core/Aws.h"
249 ];
250
251 doInstallCheck = true;
252 ARROW_TEST_DATA = lib.optionalString finalAttrs.doInstallCheck "${arrow-testing}/data";
253 PARQUET_TEST_DATA = lib.optionalString finalAttrs.doInstallCheck "${parquet-testing}/data";
254 GTEST_FILTER =
255 let
256 # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11398
257 filteredTests =
258 lib.optionals stdenv.hostPlatform.isAarch64 [
259 "TestFilterKernelWithNumeric/3.CompareArrayAndFilterRandomNumeric"
260 "TestFilterKernelWithNumeric/7.CompareArrayAndFilterRandomNumeric"
261 "TestCompareKernel.PrimitiveRandomTests"
262 ]
263 ++ lib.optionals enableS3 [
264 "S3OptionsTest.FromUri"
265 "S3RegionResolutionTest.NonExistentBucket"
266 "S3RegionResolutionTest.PublicBucket"
267 "S3RegionResolutionTest.RestrictedBucket"
268 "TestMinioServer.Connect"
269 "TestS3FS.*"
270 "TestS3FSGeneric.*"
271 ]
272 ++ lib.optionals stdenv.hostPlatform.isDarwin [
273 # TODO: revisit at 12.0.0 or when
274 # https://github.com/apache/arrow/commit/295c6644ca6b67c95a662410b2c7faea0920c989
275 # is available, see
276 # https://github.com/apache/arrow/pull/15288#discussion_r1071244661
277 "ExecPlanExecution.StressSourceSinkStopped"
278 ];
279 in
280 lib.optionalString finalAttrs.doInstallCheck "-${lib.concatStringsSep ":" filteredTests}";
281
282 __darwinAllowLocalNetworking = true;
283
284 nativeInstallCheckInputs = [
285 perl
286 which
287 sqlite
288 ]
289 ++ lib.optionals enableS3 [ minio ]
290 ++ lib.optionals enableFlight [ python3 ];
291
292 installCheckPhase =
293 let
294 disabledTests = [
295 # flaky
296 "arrow-flight-test"
297 # requires networking
298 "arrow-gcsfs-test"
299 "arrow-flight-integration-test"
300 ];
301 in
302 ''
303 runHook preInstallCheck
304
305 ctest -L unittest --exclude-regex '^(${lib.concatStringsSep "|" disabledTests})$'
306
307 runHook postInstallCheck
308 '';
309
310 meta = {
311 description = "Cross-language development platform for in-memory data";
312 homepage = "https://arrow.apache.org/docs/cpp/";
313 license = lib.licenses.asl20;
314 platforms = lib.platforms.unix;
315 maintainers = with lib.maintainers; [
316 tobim
317 veprbl
318 cpcloud
319 ];
320 pkgConfigModules = [
321 "arrow"
322 "arrow-acero"
323 "arrow-compute"
324 "arrow-csv"
325 "arrow-dataset"
326 "arrow-filesystem"
327 "arrow-flight"
328 "arrow-flight-sql"
329 "arrow-flight-testing"
330 "arrow-json"
331 "arrow-substrait"
332 "arrow-testing"
333 "parquet"
334 ];
335 };
336 passthru = {
337 inherit
338 enableFlight
339 enableJemalloc
340 enableS3
341 enableGcs
342 ;
343 tests.pkg-config = testers.testMetaPkgConfig finalAttrs.finalPackage;
344 };
345})