1{ stdenv
2, lib
3, fetchurl
4, fetchFromGitHub
5, fixDarwinDylibNames
6, autoconf
7, aws-sdk-cpp
8, boost
9, brotli
10, c-ares
11, cmake
12, crc32c
13, curl
14, flatbuffers
15, gflags
16, glog
17, google-cloud-cpp
18, grpc
19, gtest
20, libbacktrace
21, lz4
22, minio
23, ninja
24, nlohmann_json
25, openssl
26, perl
27, protobuf
28, python3
29, rapidjson
30, re2
31, snappy
32, sqlite
33, thrift
34, tzdata
35, utf8proc
36, which
37, zlib
38, zstd
39, enableShared ? !stdenv.hostPlatform.isStatic
40, enableFlight ? true
41, enableJemalloc ? !stdenv.isDarwin
42, enableS3 ? true
43, enableGcs ? !stdenv.isDarwin
44}:
45
46assert lib.asserts.assertMsg
47 ((enableS3 && stdenv.isDarwin) -> (lib.versionOlder boost.version "1.69" || lib.versionAtLeast boost.version "1.70"))
48 "S3 on Darwin requires Boost != 1.69";
49
50let
51 arrow-testing = fetchFromGitHub {
52 name = "arrow-testing";
53 owner = "apache";
54 repo = "arrow-testing";
55 rev = "47f7b56b25683202c1fd957668e13f2abafc0f12";
56 hash = "sha256-ZDznR+yi0hm5O1s9as8zq5nh1QxJ8kXCRwbNQlzXpnI=";
57 };
58
59 parquet-testing = fetchFromGitHub {
60 name = "parquet-testing";
61 owner = "apache";
62 repo = "parquet-testing";
63 rev = "b2e7cc755159196e3a068c8594f7acbaecfdaaac";
64 hash = "sha256-IFvGTOkaRSNgZOj8DziRj88yH5JRF+wgSDZ5N0GNvjk=";
65 };
66
67 aws-sdk-cpp-arrow = aws-sdk-cpp.override {
68 apis = [
69 "cognito-identity"
70 "config"
71 "identity-management"
72 "s3"
73 "sts"
74 "transfer"
75 ];
76 };
77
78in
79stdenv.mkDerivation rec {
80 pname = "arrow-cpp";
81 version = "13.0.0";
82
83 src = fetchurl {
84 url = "mirror://apache/arrow/arrow-${version}/apache-arrow-${version}.tar.gz";
85 hash = "sha256-Nd/aGRJip1a+k07viv7o0JdiytJQIdqmJuskniUayeY=";
86 };
87
88 sourceRoot = "apache-arrow-${version}/cpp";
89
90 # versions are all taken from
91 # https://github.com/apache/arrow/blob/apache-arrow-${version}/cpp/thirdparty/versions.txt
92
93 # jemalloc: arrow uses a custom prefix to prevent default allocator symbol
94 # collisions as well as custom build flags
95 ${if enableJemalloc then "ARROW_JEMALLOC_URL" else null} = fetchurl {
96 url = "https://github.com/jemalloc/jemalloc/releases/download/5.3.0/jemalloc-5.3.0.tar.bz2";
97 hash = "sha256-LbgtHnEZ3z5xt2QCGbbf6EeJvAU3mDw7esT3GJrs/qo=";
98 };
99
100 # mimalloc: arrow uses custom build flags for mimalloc
101 ARROW_MIMALLOC_URL = fetchFromGitHub {
102 owner = "microsoft";
103 repo = "mimalloc";
104 rev = "v2.0.6";
105 hash = "sha256-u2ITXABBN/dwU+mCIbL3tN1f4c17aBuSdNTV+Adtohc=";
106 };
107
108 ARROW_XSIMD_URL = fetchFromGitHub {
109 owner = "xtensor-stack";
110 repo = "xsimd";
111 rev = "9.0.1";
112 hash = "sha256-onALN6agtrHWigtFlCeefD9CiRZI4Y690XTzy2UDnrk=";
113 };
114
115 ARROW_SUBSTRAIT_URL = fetchFromGitHub {
116 owner = "substrait-io";
117 repo = "substrait";
118 rev = "v0.20.0";
119 hash = "sha256-71hAwJ0cGvpwK/ibeeQt82e9uqxcu9sM1rPtPENMPfs=";
120 };
121
122 patches = [
123 # Protobuf switched to lower case project name.
124 ./cmake-find-protobuf.patch
125 ];
126
127 nativeBuildInputs = [
128 cmake
129 ninja
130 autoconf # for vendored jemalloc
131 flatbuffers
132 ] ++ lib.optional stdenv.isDarwin fixDarwinDylibNames;
133 buildInputs = [
134 boost
135 brotli
136 flatbuffers
137 gflags
138 glog
139 gtest
140 libbacktrace
141 lz4
142 nlohmann_json # alternative JSON parser to rapidjson
143 protobuf # substrait requires protobuf
144 rapidjson
145 re2
146 snappy
147 thrift
148 utf8proc
149 zlib
150 zstd
151 ] ++ lib.optionals enableFlight [
152 grpc
153 openssl
154 protobuf
155 sqlite
156 ] ++ lib.optionals enableS3 [ aws-sdk-cpp-arrow openssl ]
157 ++ lib.optionals enableGcs [
158 crc32c
159 curl
160 google-cloud-cpp
161 grpc
162 nlohmann_json
163 ];
164
165 preConfigure = ''
166 patchShebangs build-support/
167 substituteInPlace "src/arrow/vendored/datetime/tz.cpp" \
168 --replace 'discover_tz_dir();' '"${tzdata}/share/zoneinfo";'
169 '';
170
171 cmakeFlags = [
172 "-DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON"
173 "-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}"
174 "-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}"
175 "-DARROW_BUILD_TESTS=ON"
176 "-DARROW_BUILD_INTEGRATION=ON"
177 "-DARROW_BUILD_UTILITIES=ON"
178 "-DARROW_EXTRA_ERROR_CONTEXT=ON"
179 "-DARROW_VERBOSE_THIRDPARTY_BUILD=ON"
180 "-DARROW_DEPENDENCY_SOURCE=SYSTEM"
181 "-Dxsimd_SOURCE=AUTO"
182 "-DARROW_DEPENDENCY_USE_SHARED=${if enableShared then "ON" else "OFF"}"
183 "-DARROW_COMPUTE=ON"
184 "-DARROW_CSV=ON"
185 "-DARROW_DATASET=ON"
186 "-DARROW_FILESYSTEM=ON"
187 "-DARROW_FLIGHT_SQL=${if enableFlight then "ON" else "OFF"}"
188 "-DARROW_HDFS=ON"
189 "-DARROW_IPC=ON"
190 "-DARROW_JEMALLOC=${if enableJemalloc then "ON" else "OFF"}"
191 "-DARROW_JSON=ON"
192 "-DARROW_USE_GLOG=ON"
193 "-DARROW_WITH_BACKTRACE=ON"
194 "-DARROW_WITH_BROTLI=ON"
195 "-DARROW_WITH_LZ4=ON"
196 "-DARROW_WITH_NLOHMANN_JSON=ON"
197 "-DARROW_WITH_SNAPPY=ON"
198 "-DARROW_WITH_UTF8PROC=ON"
199 "-DARROW_WITH_ZLIB=ON"
200 "-DARROW_WITH_ZSTD=ON"
201 "-DARROW_MIMALLOC=ON"
202 "-DARROW_SUBSTRAIT=ON"
203 "-DARROW_FLIGHT=${if enableFlight then "ON" else "OFF"}"
204 "-DARROW_FLIGHT_TESTING=${if enableFlight then "ON" else "OFF"}"
205 "-DARROW_S3=${if enableS3 then "ON" else "OFF"}"
206 "-DARROW_GCS=${if enableGcs then "ON" else "OFF"}"
207 # Parquet options:
208 "-DARROW_PARQUET=ON"
209 "-DPARQUET_BUILD_EXECUTABLES=ON"
210 "-DPARQUET_REQUIRE_ENCRYPTION=ON"
211 ] ++ lib.optionals (!enableShared) [
212 "-DARROW_TEST_LINKAGE=static"
213 ] ++ lib.optionals stdenv.isDarwin [
214 "-DCMAKE_INSTALL_RPATH=@loader_path/../lib" # needed for tools executables
215 ] ++ lib.optionals (!stdenv.isx86_64) [ "-DARROW_USE_SIMD=OFF" ]
216 ++ lib.optionals enableS3 [ "-DAWSSDK_CORE_HEADER_FILE=${aws-sdk-cpp-arrow}/include/aws/core/Aws.h" ];
217
218 doInstallCheck = true;
219 ARROW_TEST_DATA = lib.optionalString doInstallCheck "${arrow-testing}/data";
220 PARQUET_TEST_DATA = lib.optionalString doInstallCheck "${parquet-testing}/data";
221 GTEST_FILTER =
222 let
223 # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11398
224 filteredTests = lib.optionals stdenv.hostPlatform.isAarch64 [
225 "TestFilterKernelWithNumeric/3.CompareArrayAndFilterRandomNumeric"
226 "TestFilterKernelWithNumeric/7.CompareArrayAndFilterRandomNumeric"
227 "TestCompareKernel.PrimitiveRandomTests"
228 ] ++ lib.optionals enableS3 [
229 "S3OptionsTest.FromUri"
230 "S3RegionResolutionTest.NonExistentBucket"
231 "S3RegionResolutionTest.PublicBucket"
232 "S3RegionResolutionTest.RestrictedBucket"
233 "TestMinioServer.Connect"
234 "TestS3FS.*"
235 "TestS3FSGeneric.*"
236 ] ++ lib.optionals stdenv.isDarwin [
237 # TODO: revisit at 12.0.0 or when
238 # https://github.com/apache/arrow/commit/295c6644ca6b67c95a662410b2c7faea0920c989
239 # is available, see
240 # https://github.com/apache/arrow/pull/15288#discussion_r1071244661
241 "ExecPlanExecution.StressSourceSinkStopped"
242 ];
243 in
244 lib.optionalString doInstallCheck "-${lib.concatStringsSep ":" filteredTests}";
245
246 __darwinAllowLocalNetworking = true;
247
248 nativeInstallCheckInputs = [ perl which sqlite ]
249 ++ lib.optionals enableS3 [ minio ]
250 ++ lib.optionals enableFlight [ python3 ];
251
252 disabledTests = [
253 # requires networking
254 "arrow-gcsfs-test"
255 "arrow-flight-integration-test"
256 ];
257
258 installCheckPhase = ''
259 runHook preInstallCheck
260
261 ctest -L unittest --exclude-regex '^(${lib.concatStringsSep "|" disabledTests})$'
262
263 runHook postInstallCheck
264 '';
265
266 meta = with lib; {
267 description = "A cross-language development platform for in-memory data";
268 homepage = "https://arrow.apache.org/docs/cpp/";
269 license = licenses.asl20;
270 platforms = platforms.unix;
271 maintainers = with maintainers; [ tobim veprbl cpcloud ];
272 };
273 passthru = {
274 inherit enableFlight enableJemalloc enableS3 enableGcs;
275 };
276}