Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
1{ stdenv
2, lib
3, fetchurl
4, fetchFromGitHub
5, fixDarwinDylibNames
6, autoconf
7, aws-sdk-cpp
8, boost
9, brotli
10, c-ares
11, cmake
12, crc32c
13, curl
14, flatbuffers
15, gflags
16, glog
17, google-cloud-cpp
18, grpc
19, gtest
20, libbacktrace
21, lz4
22, minio
23, ninja
24, nlohmann_json
25, openssl
26, perl
27, protobuf
28, python3
29, rapidjson
30, re2
31, snappy
32, sqlite
33, thrift
34, tzdata
35, utf8proc
36, which
37, zlib
38, zstd
39, enableShared ? !stdenv.hostPlatform.isStatic
40, enableFlight ? true
41, enableJemalloc ? !stdenv.isDarwin
42 # boost/process is broken in 1.69 on darwin, but fixed in 1.70 and
43 # non-existent in older versions
44 # see https://github.com/boostorg/process/issues/55
45, enableS3 ? (!stdenv.isDarwin) || (lib.versionOlder boost.version "1.69" || lib.versionAtLeast boost.version "1.70")
46, enableGcs ? !stdenv.isDarwin # google-cloud-cpp is not supported on darwin
47}:
48
49assert lib.asserts.assertMsg
50 ((enableS3 && stdenv.isDarwin) -> (lib.versionOlder boost.version "1.69" || lib.versionAtLeast boost.version "1.70"))
51 "S3 on Darwin requires Boost != 1.69";
52
53let
54 arrow-testing = fetchFromGitHub {
55 owner = "apache";
56 repo = "arrow-testing";
57 rev = "5bab2f264a23f5af68f69ea93d24ef1e8e77fc88";
58 hash = "sha256-Pxx8ohUpXb5u1995IvXmxQMqWiDJ+7LAll/AjQP7ph8=";
59 };
60
61 parquet-testing = fetchFromGitHub {
62 owner = "apache";
63 repo = "parquet-testing";
64 rev = "aafd3fc9df431c2625a514fb46626e5614f1d199";
65 hash = "sha256-cO5t/mgsbBhbSefx8EMGTyxmgTjhZ8mFujkFQ3p/JS0=";
66 };
67
68in
69stdenv.mkDerivation rec {
70 pname = "arrow-cpp";
71 version = "9.0.0";
72
73 src = fetchurl {
74 url = "mirror://apache/arrow/arrow-${version}/apache-arrow-${version}.tar.gz";
75 hash = "sha256-qaAz8KNJAomZj0WGgNGVec8HkRcXumWv3my4AHD3qbU=";
76 };
77 sourceRoot = "apache-arrow-${version}/cpp";
78
79 # versions are all taken from
80 # https://github.com/apache/arrow/blob/apache-arrow-${version}/cpp/thirdparty/versions.txt
81
82 # jemalloc: arrow uses a custom prefix to prevent default allocator symbol
83 # collisions as well as custom build flags
84 ${if enableJemalloc then "ARROW_JEMALLOC_URL" else null} = fetchurl {
85 url = "https://github.com/jemalloc/jemalloc/releases/download/5.3.0/jemalloc-5.3.0.tar.bz2";
86 hash = "sha256-LbgtHnEZ3z5xt2QCGbbf6EeJvAU3mDw7esT3GJrs/qo=";
87 };
88
89 # mimalloc: arrow uses custom build flags for mimalloc
90 ARROW_MIMALLOC_URL = fetchFromGitHub {
91 owner = "microsoft";
92 repo = "mimalloc";
93 rev = "v2.0.6";
94 hash = "sha256-u2ITXABBN/dwU+mCIbL3tN1f4c17aBuSdNTV+Adtohc=";
95 };
96
97 ARROW_XSIMD_URL = fetchFromGitHub {
98 owner = "xtensor-stack";
99 repo = "xsimd";
100 rev = "8.1.0";
101 hash = "sha256-Aqs6XJkGjAjGAp0PprabSM4m+32M/UXpSHppCHdzaZk=";
102 };
103
104 ARROW_SUBSTRAIT_URL = fetchFromGitHub {
105 owner = "substrait-io";
106 repo = "substrait";
107 rev = "v0.6.0";
108 hash = "sha256-hxCBomL4Qg9cHLRg9ZiO9k+JVOZXn6f4ikPtK+V9tno=";
109 };
110
111 patches = [
112 # patch to fix python-test
113 ./darwin.patch
114 ];
115
116 nativeBuildInputs = [
117 cmake
118 ninja
119 autoconf # for vendored jemalloc
120 flatbuffers
121 ] ++ lib.optional stdenv.isDarwin fixDarwinDylibNames;
122 buildInputs = [
123 boost
124 brotli
125 flatbuffers
126 gflags
127 glog
128 gtest
129 libbacktrace
130 lz4
131 nlohmann_json # alternative JSON parser to rapidjson
132 protobuf # substrait requires protobuf
133 rapidjson
134 re2
135 snappy
136 thrift
137 utf8proc
138 zlib
139 zstd
140 ] ++ lib.optionals enableShared [
141 python3.pkgs.python
142 python3.pkgs.numpy
143 ] ++ lib.optionals enableFlight [
144 grpc
145 openssl
146 protobuf
147 ] ++ lib.optionals enableS3 [ aws-sdk-cpp openssl ]
148 ++ lib.optionals enableGcs [
149 crc32c
150 curl
151 google-cloud-cpp grpc
152 nlohmann_json
153 ];
154
155 preConfigure = ''
156 patchShebangs build-support/
157 substituteInPlace "src/arrow/vendored/datetime/tz.cpp" \
158 --replace 'discover_tz_dir();' '"${tzdata}/share/zoneinfo";'
159 '';
160
161 cmakeFlags = [
162 "-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}"
163 "-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}"
164 "-DARROW_BUILD_TESTS=ON"
165 "-DARROW_BUILD_INTEGRATION=ON"
166 "-DARROW_BUILD_UTILITIES=ON"
167 "-DARROW_EXTRA_ERROR_CONTEXT=ON"
168 "-DARROW_VERBOSE_THIRDPARTY_BUILD=ON"
169 "-DARROW_DEPENDENCY_SOURCE=SYSTEM"
170 "-Dxsimd_SOURCE=AUTO"
171 "-DARROW_DEPENDENCY_USE_SHARED=${if enableShared then "ON" else "OFF"}"
172 "-DARROW_COMPUTE=ON"
173 "-DARROW_CSV=ON"
174 "-DARROW_DATASET=ON"
175 "-DARROW_ENGINE=ON"
176 "-DARROW_FILESYSTEM=ON"
177 "-DARROW_FLIGHT_SQL=${if enableFlight then "ON" else "OFF"}"
178 "-DARROW_HDFS=ON"
179 "-DARROW_IPC=ON"
180 "-DARROW_JEMALLOC=${if enableJemalloc then "ON" else "OFF"}"
181 "-DARROW_JSON=ON"
182 "-DARROW_PLASMA=ON"
183 # Disable Python for static mode because openblas is currently broken there.
184 "-DARROW_PYTHON=${if enableShared then "ON" else "OFF"}"
185 "-DARROW_USE_GLOG=ON"
186 "-DARROW_WITH_BACKTRACE=ON"
187 "-DARROW_WITH_BROTLI=ON"
188 "-DARROW_WITH_LZ4=ON"
189 "-DARROW_WITH_NLOHMANN_JSON=ON"
190 "-DARROW_WITH_SNAPPY=ON"
191 "-DARROW_WITH_UTF8PROC=ON"
192 "-DARROW_WITH_ZLIB=ON"
193 "-DARROW_WITH_ZSTD=ON"
194 "-DARROW_MIMALLOC=ON"
195 # Parquet options:
196 "-DARROW_PARQUET=ON"
197 "-DARROW_SUBSTRAIT=ON"
198 "-DPARQUET_BUILD_EXECUTABLES=ON"
199 "-DARROW_FLIGHT=${if enableFlight then "ON" else "OFF"}"
200 "-DARROW_FLIGHT_TESTING=${if enableFlight then "ON" else "OFF"}"
201 "-DARROW_S3=${if enableS3 then "ON" else "OFF"}"
202 "-DARROW_GCS=${if enableGcs then "ON" else "OFF"}"
203 ] ++ lib.optionals (!enableShared) [
204 "-DARROW_TEST_LINKAGE=static"
205 ] ++ lib.optionals stdenv.isDarwin [
206 "-DCMAKE_INSTALL_RPATH=@loader_path/../lib" # needed for tools executables
207 ] ++ lib.optional (!stdenv.isx86_64) "-DARROW_USE_SIMD=OFF"
208 ++ lib.optional enableS3 "-DAWSSDK_CORE_HEADER_FILE=${aws-sdk-cpp}/include/aws/core/Aws.h"
209 ++ lib.optionals enableGcs [ "-DCMAKE_CXX_STANDARD=${grpc.cxxStandard}" ];
210
211 doInstallCheck = true;
212 ARROW_TEST_DATA = lib.optionalString doInstallCheck "${arrow-testing}/data";
213 PARQUET_TEST_DATA = lib.optionalString doInstallCheck "${parquet-testing}/data";
214 GTEST_FILTER =
215 let
216 # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11398
217 filteredTests = lib.optionals stdenv.hostPlatform.isAarch64 [
218 "TestFilterKernelWithNumeric/3.CompareArrayAndFilterRandomNumeric"
219 "TestFilterKernelWithNumeric/7.CompareArrayAndFilterRandomNumeric"
220 "TestCompareKernel.PrimitiveRandomTests"
221 ] ++ lib.optionals enableS3 [
222 "S3OptionsTest.FromUri"
223 "S3RegionResolutionTest.NonExistentBucket"
224 "S3RegionResolutionTest.PublicBucket"
225 "S3RegionResolutionTest.RestrictedBucket"
226 "TestMinioServer.Connect"
227 "TestS3FS.*"
228 "TestS3FSGeneric.*"
229 ];
230 in
231 lib.optionalString doInstallCheck "-${builtins.concatStringsSep ":" filteredTests}";
232 __darwinAllowLocalNetworking = true;
233 installCheckInputs = [ perl which sqlite ] ++ lib.optional enableS3 minio;
234 installCheckPhase =
235 let
236 excludedTests = lib.optionals stdenv.isDarwin [
237 # Some plasma tests need to be patched to use a shorter AF_UNIX socket
238 # path on Darwin. See https://github.com/NixOS/nix/pull/1085
239 "plasma-external-store-tests"
240 "plasma-client-tests"
241 ] ++ [ "arrow-gcsfs-test" ];
242 in
243 ''
244 runHook preInstallCheck
245
246 ctest -L unittest \
247 --exclude-regex '^(${builtins.concatStringsSep "|" excludedTests})$'
248
249 runHook postInstallCheck
250 '';
251
252 meta = with lib; {
253 description = "A cross-language development platform for in-memory data";
254 homepage = "https://arrow.apache.org/docs/cpp/";
255 license = licenses.asl20;
256 platforms = platforms.unix;
257 maintainers = with maintainers; [ tobim veprbl cpcloud ];
258 };
259 passthru = {
260 inherit enableFlight enableJemalloc enableS3 enableGcs;
261 };
262}