nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
1{ stdenv
2, lib
3, fetchurl
4, fetchFromGitHub
5, fixDarwinDylibNames
6, abseil-cpp
7, autoconf
8, aws-sdk-cpp
9, boost
10, brotli
11, c-ares
12, cmake
13, crc32c
14, curl
15, flatbuffers
16, gflags
17, glog
18, google-cloud-cpp
19, grpc
20, gtest
21, jemalloc
22, libbacktrace
23, lz4
24, minio
25, ninja
26, nlohmann_json
27, openssl
28, perl
29, protobuf
30, python3
31, rapidjson
32, re2
33, snappy
34, sqlite
35, thrift
36, tzdata
37, utf8proc
38, which
39, zlib
40, zstd
41, enableShared ? !stdenv.hostPlatform.isStatic
42, enableFlight ? true
43, enableJemalloc ? !(stdenv.isAarch64 && stdenv.isDarwin)
44 # boost/process is broken in 1.69 on darwin, but fixed in 1.70 and
45 # non-existent in older versions
46 # see https://github.com/boostorg/process/issues/55
47, enableS3 ? (!stdenv.isDarwin) || (lib.versionOlder boost.version "1.69" || lib.versionAtLeast boost.version "1.70")
48, enableGcs ? !stdenv.isDarwin # google-cloud-cpp is not supported on darwin
49}:
50
51assert lib.asserts.assertMsg
52 ((enableS3 && stdenv.isDarwin) -> (lib.versionOlder boost.version "1.69" || lib.versionAtLeast boost.version "1.70"))
53 "S3 on Darwin requires Boost != 1.69";
54
55let
56 arrow-testing = fetchFromGitHub {
57 owner = "apache";
58 repo = "arrow-testing";
59 rev = "634739c664433cec366b4b9a81d1e1044a8c5eda";
60 hash = "sha256-r1WVgJJsI7v485L6Qb+5i7kFO4Tvxyk1T0JBb4og6pg=";
61 };
62
63 parquet-testing = fetchFromGitHub {
64 owner = "apache";
65 repo = "parquet-testing";
66 rev = "acd375eb86a81cd856476fca0f52ba6036a067ff";
67 hash = "sha256-z/kmi+4dBO/dsVkJA4NgUoxl0pXi8RWIGvI8MGu/gcc=";
68 };
69
70in
71stdenv.mkDerivation rec {
72 pname = "arrow-cpp";
73 version = "8.0.0";
74
75 src = fetchurl {
76 url = "mirror://apache/arrow/arrow-${version}/apache-arrow-${version}.tar.gz";
77 hash = "sha256-rZoFcFEXyYnBFrrprHBJL+AVBQ4bgPsOOP3ktdhjqqM=";
78 };
79 sourceRoot = "apache-arrow-${version}/cpp";
80
81 ${if enableJemalloc then "ARROW_JEMALLOC_URL" else null} = jemalloc.src;
82
83 # versions are all taken from
84 # https://github.com/apache/arrow/blob/apache-arrow-8.0.0/cpp/thirdparty/versions.txt
85
86 ARROW_MIMALLOC_URL = fetchFromGitHub {
87 owner = "microsoft";
88 repo = "mimalloc";
89 rev = "v1.7.3";
90 hash = "sha256-Ca877VitpWyKmZNHavqgewk/P+tyd2xHDNVqveKh87M=";
91 };
92
93 ARROW_XSIMD_URL = fetchFromGitHub {
94 owner = "xtensor-stack";
95 repo = "xsimd";
96 rev = "7d1778c3b38d63db7cec7145d939f40bc5d859d1";
97 hash = "sha256-89AysBUVnTdWyMPazeJegnQ6WEH90Ns7qQInZLMSXY4=";
98 };
99
100 ARROW_SUBSTRAIT_URL = fetchFromGitHub {
101 owner = "substrait-io";
102 repo = "substrait";
103 rev = "e1b4c04a1b518912f4c4065b16a1b2c0ac8e14cf";
104 hash = "sha256-56FSjDngsROSHLjMv+OYAIYqphEu3GzgIMHbgh/ZQw0=";
105 };
106
107 patches = [
108 # patch to fix python-test
109 ./darwin.patch
110 ];
111
112 nativeBuildInputs = [
113 cmake
114 ninja
115 autoconf # for vendored jemalloc
116 flatbuffers
117 ] ++ lib.optional stdenv.isDarwin fixDarwinDylibNames;
118 buildInputs = [
119 boost
120 brotli
121 flatbuffers
122 gflags
123 glog
124 gtest
125 libbacktrace
126 lz4
127 nlohmann_json # alternative JSON parser to rapidjson
128 protobuf # substrait requires protobuf
129 rapidjson
130 re2
131 snappy
132 thrift
133 utf8proc
134 zlib
135 zstd
136 ] ++ lib.optionals enableShared [
137 python3.pkgs.python
138 python3.pkgs.numpy
139 ] ++ lib.optionals enableFlight [
140 grpc
141 openssl
142 protobuf
143 ] ++ lib.optionals enableS3 [ aws-sdk-cpp openssl ]
144 ++ lib.optionals enableGcs [
145 abseil-cpp
146 crc32c
147 curl
148 google-cloud-cpp
149 nlohmann_json
150 ];
151
152 preConfigure = ''
153 patchShebangs build-support/
154 substituteInPlace "src/arrow/vendored/datetime/tz.cpp" \
155 --replace 'discover_tz_dir();' '"${tzdata}/share/zoneinfo";'
156 '';
157
158 cmakeFlags = [
159 "-DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON"
160 "-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}"
161 "-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}"
162 "-DARROW_BUILD_TESTS=ON"
163 "-DARROW_BUILD_INTEGRATION=ON"
164 "-DARROW_BUILD_UTILITIES=ON"
165 "-DARROW_EXTRA_ERROR_CONTEXT=ON"
166 "-DARROW_VERBOSE_THIRDPARTY_BUILD=ON"
167 "-DARROW_DEPENDENCY_SOURCE=SYSTEM"
168 "-DThrift_SOURCE=AUTO" # search for Thrift using pkg-config (ThriftConfig.cmake requires OpenSSL and libevent)
169 "-DARROW_DEPENDENCY_USE_SHARED=${if enableShared then "ON" else "OFF"}"
170 "-DARROW_COMPUTE=ON"
171 "-DARROW_CSV=ON"
172 "-DARROW_DATASET=ON"
173 "-DARROW_ENGINE=ON"
174 "-DARROW_FILESYSTEM=ON"
175 "-DARROW_FLIGHT_SQL=${if enableFlight then "ON" else "OFF"}"
176 "-DARROW_HDFS=ON"
177 "-DARROW_IPC=ON"
178 "-DARROW_JEMALLOC=${if enableJemalloc then "ON" else "OFF"}"
179 "-DARROW_JSON=ON"
180 "-DARROW_PLASMA=ON"
181 # Disable Python for static mode because openblas is currently broken there.
182 "-DARROW_PYTHON=${if enableShared then "ON" else "OFF"}"
183 "-DARROW_USE_GLOG=ON"
184 "-DARROW_WITH_BACKTRACE=ON"
185 "-DARROW_WITH_BROTLI=ON"
186 "-DARROW_WITH_LZ4=ON"
187 "-DARROW_WITH_NLOHMANN_JSON=ON"
188 "-DARROW_WITH_SNAPPY=ON"
189 "-DARROW_WITH_UTF8PROC=ON"
190 "-DARROW_WITH_ZLIB=ON"
191 "-DARROW_WITH_ZSTD=ON"
192 "-DARROW_MIMALLOC=ON"
193 # Parquet options:
194 "-DARROW_PARQUET=ON"
195 "-DARROW_SUBSTRAIT=ON"
196 "-DPARQUET_BUILD_EXECUTABLES=ON"
197 "-DARROW_FLIGHT=${if enableFlight then "ON" else "OFF"}"
198 "-DARROW_FLIGHT_TESTING=${if enableFlight then "ON" else "OFF"}"
199 "-DARROW_S3=${if enableS3 then "ON" else "OFF"}"
200 "-DARROW_GCS=${if enableGcs then "ON" else "OFF"}"
201 ] ++ lib.optionals (!enableShared) [
202 "-DARROW_TEST_LINKAGE=static"
203 ] ++ lib.optionals stdenv.isDarwin [
204 "-DCMAKE_SKIP_BUILD_RPATH=OFF" # needed for tests
205 "-DCMAKE_INSTALL_RPATH=@loader_path/../lib" # needed for tools executables
206 ] ++ lib.optional (!stdenv.isx86_64) "-DARROW_USE_SIMD=OFF"
207 ++ lib.optional enableS3 "-DAWSSDK_CORE_HEADER_FILE=${aws-sdk-cpp}/include/aws/core/Aws.h";
208
209 doInstallCheck = true;
210 ARROW_TEST_DATA = lib.optionalString doInstallCheck "${arrow-testing}/data";
211 PARQUET_TEST_DATA = lib.optionalString doInstallCheck "${parquet-testing}/data";
212 GTEST_FILTER =
213 let
214 # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11398
215 filteredTests = lib.optionals stdenv.hostPlatform.isAarch64 [
216 "TestFilterKernelWithNumeric/3.CompareArrayAndFilterRandomNumeric"
217 "TestFilterKernelWithNumeric/7.CompareArrayAndFilterRandomNumeric"
218 "TestCompareKernel.PrimitiveRandomTests"
219 ] ++ lib.optionals enableS3 [
220 "S3OptionsTest.FromUri"
221 "S3RegionResolutionTest.NonExistentBucket"
222 "S3RegionResolutionTest.PublicBucket"
223 "S3RegionResolutionTest.RestrictedBucket"
224 "TestMinioServer.Connect"
225 "TestS3FS.*"
226 "TestS3FSGeneric.*"
227 ];
228 in
229 lib.optionalString doInstallCheck "-${builtins.concatStringsSep ":" filteredTests}";
230 installCheckInputs = [ perl which sqlite ] ++ lib.optional enableS3 minio;
231 installCheckPhase =
232 let
233 excludedTests = lib.optionals stdenv.isDarwin [
234 # Some plasma tests need to be patched to use a shorter AF_UNIX socket
235 # path on Darwin. See https://github.com/NixOS/nix/pull/1085
236 "plasma-external-store-tests"
237 "plasma-client-tests"
238 ] ++ [ "arrow-gcsfs-test" ];
239 in
240 ''
241 runHook preInstallCheck
242
243 ctest -L unittest \
244 --exclude-regex '^(${builtins.concatStringsSep "|" excludedTests})$'
245
246 runHook postInstallCheck
247 '';
248
249 meta = with lib; {
250 description = "A cross-language development platform for in-memory data";
251 homepage = "https://arrow.apache.org/";
252 license = licenses.asl20;
253 platforms = platforms.unix;
254 maintainers = with maintainers; [ tobim veprbl cpcloud ];
255 };
256 passthru = {
257 inherit enableFlight enableJemalloc enableS3 enableGcs;
258 };
259}