1{ stdenv
2, lib
3, fetchurl
4, fetchFromGitHub
5, fixDarwinDylibNames
6, autoconf
7, aws-sdk-cpp
8, boost
9, brotli
10, c-ares
11, cmake
12, crc32c
13, curl
14, flatbuffers
15, gflags
16, glog
17, google-cloud-cpp
18, grpc
19, gtest
20, libbacktrace
21, lz4
22, minio
23, ninja
24, nlohmann_json
25, openssl
26, perl
27, protobuf
28, python3
29, rapidjson
30, re2
31, snappy
32, sqlite
33, thrift
34, tzdata
35, utf8proc
36, which
37, zlib
38, zstd
39, enableShared ? !stdenv.hostPlatform.isStatic
40, enableFlight ? true
41, enableJemalloc ? !stdenv.isDarwin
42 # boost/process is broken in 1.69 on darwin, but fixed in 1.70 and
43 # non-existent in older versions
44 # see https://github.com/boostorg/process/issues/55
45, enableS3 ? (!stdenv.isDarwin) || (lib.versionOlder boost.version "1.69" || lib.versionAtLeast boost.version "1.70")
46, enableGcs ? (!stdenv.isDarwin) && (lib.versionAtLeast grpc.cxxStandard "17") # google-cloud-cpp is not supported on darwin, needs to support C++17
47}:
48
49assert lib.asserts.assertMsg
50 ((enableS3 && stdenv.isDarwin) -> (lib.versionOlder boost.version "1.69" || lib.versionAtLeast boost.version "1.70"))
51 "S3 on Darwin requires Boost != 1.69";
52
53let
54 arrow-testing = fetchFromGitHub {
55 name = "arrow-testing";
56 owner = "apache";
57 repo = "arrow-testing";
58 rev = "47f7b56b25683202c1fd957668e13f2abafc0f12";
59 hash = "sha256-ZDznR+yi0hm5O1s9as8zq5nh1QxJ8kXCRwbNQlzXpnI=";
60 };
61
62 parquet-testing = fetchFromGitHub {
63 name = "parquet-testing";
64 owner = "apache";
65 repo = "parquet-testing";
66 rev = "b2e7cc755159196e3a068c8594f7acbaecfdaaac";
67 hash = "sha256-IFvGTOkaRSNgZOj8DziRj88yH5JRF+wgSDZ5N0GNvjk=";
68 };
69
70 aws-sdk-cpp-arrow = aws-sdk-cpp.override {
71 apis = [
72 "cognito-identity"
73 "config"
74 "identity-management"
75 "s3"
76 "sts"
77 "transfer"
78 ];
79 };
80
81in
82stdenv.mkDerivation rec {
83 pname = "arrow-cpp";
84 version = "12.0.0";
85
86 src = fetchurl {
87 url = "mirror://apache/arrow/arrow-${version}/apache-arrow-${version}.tar.gz";
88 hash = "sha256-3dg0eIJ3XlOvfQlloZArfY/NCgMP0U94PU+F6CE1LVI=";
89 };
90
91 sourceRoot = "apache-arrow-${version}/cpp";
92
93 # versions are all taken from
94 # https://github.com/apache/arrow/blob/apache-arrow-${version}/cpp/thirdparty/versions.txt
95
96 # jemalloc: arrow uses a custom prefix to prevent default allocator symbol
97 # collisions as well as custom build flags
98 ${if enableJemalloc then "ARROW_JEMALLOC_URL" else null} = fetchurl {
99 url = "https://github.com/jemalloc/jemalloc/releases/download/5.3.0/jemalloc-5.3.0.tar.bz2";
100 hash = "sha256-LbgtHnEZ3z5xt2QCGbbf6EeJvAU3mDw7esT3GJrs/qo=";
101 };
102
103 # mimalloc: arrow uses custom build flags for mimalloc
104 ARROW_MIMALLOC_URL = fetchFromGitHub {
105 owner = "microsoft";
106 repo = "mimalloc";
107 rev = "v2.0.6";
108 hash = "sha256-u2ITXABBN/dwU+mCIbL3tN1f4c17aBuSdNTV+Adtohc=";
109 };
110
111 ARROW_XSIMD_URL = fetchFromGitHub {
112 owner = "xtensor-stack";
113 repo = "xsimd";
114 rev = "9.0.1";
115 hash = "sha256-onALN6agtrHWigtFlCeefD9CiRZI4Y690XTzy2UDnrk=";
116 };
117
118 ARROW_SUBSTRAIT_URL = fetchFromGitHub {
119 owner = "substrait-io";
120 repo = "substrait";
121 rev = "v0.20.0";
122 hash = "sha256-71hAwJ0cGvpwK/ibeeQt82e9uqxcu9sM1rPtPENMPfs=";
123 };
124
125 patches = [
126 # patch to fix python-test
127 ./darwin.patch
128 ];
129
130 nativeBuildInputs = [
131 cmake
132 ninja
133 autoconf # for vendored jemalloc
134 flatbuffers
135 ] ++ lib.optional stdenv.isDarwin fixDarwinDylibNames;
136 buildInputs = [
137 boost
138 brotli
139 flatbuffers
140 gflags
141 glog
142 gtest
143 libbacktrace
144 lz4
145 nlohmann_json # alternative JSON parser to rapidjson
146 protobuf # substrait requires protobuf
147 rapidjson
148 re2
149 snappy
150 thrift
151 utf8proc
152 zlib
153 zstd
154 ] ++ lib.optionals enableFlight [
155 grpc
156 openssl
157 protobuf
158 sqlite
159 ] ++ lib.optionals enableS3 [ aws-sdk-cpp-arrow openssl ]
160 ++ lib.optionals enableGcs [
161 crc32c
162 curl
163 google-cloud-cpp
164 grpc
165 nlohmann_json
166 ];
167
168 preConfigure = ''
169 patchShebangs build-support/
170 substituteInPlace "src/arrow/vendored/datetime/tz.cpp" \
171 --replace 'discover_tz_dir();' '"${tzdata}/share/zoneinfo";'
172 '';
173
174 cmakeFlags = [
175 "-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}"
176 "-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}"
177 "-DARROW_BUILD_TESTS=ON"
178 "-DARROW_BUILD_INTEGRATION=ON"
179 "-DARROW_BUILD_UTILITIES=ON"
180 "-DARROW_EXTRA_ERROR_CONTEXT=ON"
181 "-DARROW_VERBOSE_THIRDPARTY_BUILD=ON"
182 "-DARROW_DEPENDENCY_SOURCE=SYSTEM"
183 "-Dxsimd_SOURCE=AUTO"
184 "-DARROW_DEPENDENCY_USE_SHARED=${if enableShared then "ON" else "OFF"}"
185 "-DARROW_COMPUTE=ON"
186 "-DARROW_CSV=ON"
187 "-DARROW_DATASET=ON"
188 "-DARROW_FILESYSTEM=ON"
189 "-DARROW_FLIGHT_SQL=${if enableFlight then "ON" else "OFF"}"
190 "-DARROW_HDFS=ON"
191 "-DARROW_IPC=ON"
192 "-DARROW_JEMALLOC=${if enableJemalloc then "ON" else "OFF"}"
193 "-DARROW_JSON=ON"
194 "-DARROW_USE_GLOG=ON"
195 "-DARROW_WITH_BACKTRACE=ON"
196 "-DARROW_WITH_BROTLI=ON"
197 "-DARROW_WITH_LZ4=ON"
198 "-DARROW_WITH_NLOHMANN_JSON=ON"
199 "-DARROW_WITH_SNAPPY=ON"
200 "-DARROW_WITH_UTF8PROC=ON"
201 "-DARROW_WITH_ZLIB=ON"
202 "-DARROW_WITH_ZSTD=ON"
203 "-DARROW_MIMALLOC=ON"
204 "-DARROW_SUBSTRAIT=ON"
205 "-DARROW_FLIGHT=${if enableFlight then "ON" else "OFF"}"
206 "-DARROW_FLIGHT_TESTING=${if enableFlight then "ON" else "OFF"}"
207 "-DARROW_S3=${if enableS3 then "ON" else "OFF"}"
208 "-DARROW_GCS=${if enableGcs then "ON" else "OFF"}"
209 # Parquet options:
210 "-DARROW_PARQUET=ON"
211 "-DPARQUET_BUILD_EXECUTABLES=ON"
212 "-DPARQUET_REQUIRE_ENCRYPTION=ON"
213 ] ++ lib.optionals (!enableShared) [
214 "-DARROW_TEST_LINKAGE=static"
215 ] ++ lib.optionals stdenv.isDarwin [
216 "-DCMAKE_INSTALL_RPATH=@loader_path/../lib" # needed for tools executables
217 ] ++ lib.optionals (!stdenv.isx86_64) [ "-DARROW_USE_SIMD=OFF" ]
218 ++ lib.optionals enableS3 [ "-DAWSSDK_CORE_HEADER_FILE=${aws-sdk-cpp-arrow}/include/aws/core/Aws.h" ];
219
220 doInstallCheck = true;
221 ARROW_TEST_DATA = lib.optionalString doInstallCheck "${arrow-testing}/data";
222 PARQUET_TEST_DATA = lib.optionalString doInstallCheck "${parquet-testing}/data";
223 GTEST_FILTER =
224 let
225 # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11398
226 filteredTests = lib.optionals stdenv.hostPlatform.isAarch64 [
227 "TestFilterKernelWithNumeric/3.CompareArrayAndFilterRandomNumeric"
228 "TestFilterKernelWithNumeric/7.CompareArrayAndFilterRandomNumeric"
229 "TestCompareKernel.PrimitiveRandomTests"
230 ] ++ lib.optionals enableS3 [
231 "S3OptionsTest.FromUri"
232 "S3RegionResolutionTest.NonExistentBucket"
233 "S3RegionResolutionTest.PublicBucket"
234 "S3RegionResolutionTest.RestrictedBucket"
235 "TestMinioServer.Connect"
236 "TestS3FS.*"
237 "TestS3FSGeneric.*"
238 ] ++ lib.optionals stdenv.isDarwin [
239 # TODO: revisit at 12.0.0 or when
240 # https://github.com/apache/arrow/commit/295c6644ca6b67c95a662410b2c7faea0920c989
241 # is available, see
242 # https://github.com/apache/arrow/pull/15288#discussion_r1071244661
243 "ExecPlanExecution.StressSourceSinkStopped"
244 ];
245 in
246 lib.optionalString doInstallCheck "-${lib.concatStringsSep ":" filteredTests}";
247
248 __darwinAllowLocalNetworking = true;
249
250 nativeInstallCheckInputs = [ perl which sqlite ]
251 ++ lib.optionals enableS3 [ minio ]
252 ++ lib.optionals enableFlight [ python3 ];
253
254 disabledTests = [
255 # requires networking
256 "arrow-gcsfs-test"
257 "arrow-flight-integration-test"
258 ];
259
260 installCheckPhase = ''
261 runHook preInstallCheck
262
263 ctest -L unittest --exclude-regex '^(${lib.concatStringsSep "|" disabledTests})$'
264
265 runHook postInstallCheck
266 '';
267
268 meta = with lib; {
269 description = "A cross-language development platform for in-memory data";
270 homepage = "https://arrow.apache.org/docs/cpp/";
271 license = licenses.asl20;
272 platforms = platforms.unix;
273 maintainers = with maintainers; [ tobim veprbl cpcloud ];
274 };
275 passthru = {
276 inherit enableFlight enableJemalloc enableS3 enableGcs;
277 };
278}