at 23.11-beta 7.6 kB view raw
1{ stdenv 2, lib 3, fetchurl 4, fetchFromGitHub 5, fixDarwinDylibNames 6, autoconf 7, aws-sdk-cpp 8, boost 9, brotli 10, c-ares 11, cmake 12, crc32c 13, curl 14, flatbuffers 15, gflags 16, glog 17, google-cloud-cpp 18, grpc 19, gtest 20, libbacktrace 21, lz4 22, minio 23, ninja 24, nlohmann_json 25, openssl 26, perl 27, protobuf 28, python3 29, rapidjson 30, re2 31, snappy 32, sqlite 33, thrift 34, tzdata 35, utf8proc 36, which 37, zlib 38, zstd 39, enableShared ? !stdenv.hostPlatform.isStatic 40, enableFlight ? true 41, enableJemalloc ? !stdenv.isDarwin 42, enableS3 ? true 43, enableGcs ? !stdenv.isDarwin 44}: 45 46assert lib.asserts.assertMsg 47 ((enableS3 && stdenv.isDarwin) -> (lib.versionOlder boost.version "1.69" || lib.versionAtLeast boost.version "1.70")) 48 "S3 on Darwin requires Boost != 1.69"; 49 50let 51 arrow-testing = fetchFromGitHub { 52 name = "arrow-testing"; 53 owner = "apache"; 54 repo = "arrow-testing"; 55 rev = "47f7b56b25683202c1fd957668e13f2abafc0f12"; 56 hash = "sha256-ZDznR+yi0hm5O1s9as8zq5nh1QxJ8kXCRwbNQlzXpnI="; 57 }; 58 59 parquet-testing = fetchFromGitHub { 60 name = "parquet-testing"; 61 owner = "apache"; 62 repo = "parquet-testing"; 63 rev = "b2e7cc755159196e3a068c8594f7acbaecfdaaac"; 64 hash = "sha256-IFvGTOkaRSNgZOj8DziRj88yH5JRF+wgSDZ5N0GNvjk="; 65 }; 66 67 aws-sdk-cpp-arrow = aws-sdk-cpp.override { 68 apis = [ 69 "cognito-identity" 70 "config" 71 "identity-management" 72 "s3" 73 "sts" 74 "transfer" 75 ]; 76 }; 77 78in 79stdenv.mkDerivation rec { 80 pname = "arrow-cpp"; 81 version = "14.0.1"; 82 83 src = fetchurl { 84 url = "mirror://apache/arrow/arrow-${version}/apache-arrow-${version}.tar.gz"; 85 hash = "sha256-XHDq+xAR+dEkuvsyiv5U9izFuSgLcIDh49Zo94wOQH4="; 86 }; 87 88 sourceRoot = "apache-arrow-${version}/cpp"; 89 90 # versions are all taken from 91 # https://github.com/apache/arrow/blob/apache-arrow-${version}/cpp/thirdparty/versions.txt 92 93 # jemalloc: arrow uses a custom prefix to prevent default allocator symbol 94 # collisions as well as custom build flags 95 ${if enableJemalloc then "ARROW_JEMALLOC_URL" else null} = fetchurl { 96 url = "https://github.com/jemalloc/jemalloc/releases/download/5.3.0/jemalloc-5.3.0.tar.bz2"; 97 hash = "sha256-LbgtHnEZ3z5xt2QCGbbf6EeJvAU3mDw7esT3GJrs/qo="; 98 }; 99 100 # mimalloc: arrow uses custom build flags for mimalloc 101 ARROW_MIMALLOC_URL = fetchFromGitHub { 102 owner = "microsoft"; 103 repo = "mimalloc"; 104 rev = "v2.0.6"; 105 hash = "sha256-u2ITXABBN/dwU+mCIbL3tN1f4c17aBuSdNTV+Adtohc="; 106 }; 107 108 ARROW_XSIMD_URL = fetchFromGitHub { 109 owner = "xtensor-stack"; 110 repo = "xsimd"; 111 rev = "9.0.1"; 112 hash = "sha256-onALN6agtrHWigtFlCeefD9CiRZI4Y690XTzy2UDnrk="; 113 }; 114 115 ARROW_SUBSTRAIT_URL = fetchFromGitHub { 116 owner = "substrait-io"; 117 repo = "substrait"; 118 rev = "v0.27.0"; 119 hash = "sha256-wptEAXembah04pzqAz6UHeUxp+jMf6Lh/IdyuIhy/a8="; 120 }; 121 122 nativeBuildInputs = [ 123 cmake 124 ninja 125 autoconf # for vendored jemalloc 126 flatbuffers 127 ] ++ lib.optional stdenv.isDarwin fixDarwinDylibNames; 128 buildInputs = [ 129 boost 130 brotli 131 flatbuffers 132 gflags 133 glog 134 gtest 135 libbacktrace 136 lz4 137 nlohmann_json # alternative JSON parser to rapidjson 138 protobuf # substrait requires protobuf 139 rapidjson 140 re2 141 snappy 142 thrift 143 utf8proc 144 zlib 145 zstd 146 ] ++ lib.optionals enableFlight [ 147 grpc 148 openssl 149 protobuf 150 sqlite 151 ] ++ lib.optionals enableS3 [ aws-sdk-cpp-arrow openssl ] 152 ++ lib.optionals enableGcs [ 153 crc32c 154 curl 155 google-cloud-cpp 156 grpc 157 nlohmann_json 158 ]; 159 160 preConfigure = '' 161 patchShebangs build-support/ 162 substituteInPlace "src/arrow/vendored/datetime/tz.cpp" \ 163 --replace 'discover_tz_dir();' '"${tzdata}/share/zoneinfo";' 164 ''; 165 166 cmakeFlags = [ 167 "-DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON" 168 "-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}" 169 "-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}" 170 "-DARROW_BUILD_TESTS=ON" 171 "-DARROW_BUILD_INTEGRATION=ON" 172 "-DARROW_BUILD_UTILITIES=ON" 173 "-DARROW_EXTRA_ERROR_CONTEXT=ON" 174 "-DARROW_VERBOSE_THIRDPARTY_BUILD=ON" 175 "-DARROW_DEPENDENCY_SOURCE=SYSTEM" 176 "-Dxsimd_SOURCE=AUTO" 177 "-DARROW_DEPENDENCY_USE_SHARED=${if enableShared then "ON" else "OFF"}" 178 "-DARROW_COMPUTE=ON" 179 "-DARROW_CSV=ON" 180 "-DARROW_DATASET=ON" 181 "-DARROW_FILESYSTEM=ON" 182 "-DARROW_FLIGHT_SQL=${if enableFlight then "ON" else "OFF"}" 183 "-DARROW_HDFS=ON" 184 "-DARROW_IPC=ON" 185 "-DARROW_JEMALLOC=${if enableJemalloc then "ON" else "OFF"}" 186 "-DARROW_JSON=ON" 187 "-DARROW_USE_GLOG=ON" 188 "-DARROW_WITH_BACKTRACE=ON" 189 "-DARROW_WITH_BROTLI=ON" 190 "-DARROW_WITH_LZ4=ON" 191 "-DARROW_WITH_NLOHMANN_JSON=ON" 192 "-DARROW_WITH_SNAPPY=ON" 193 "-DARROW_WITH_UTF8PROC=ON" 194 "-DARROW_WITH_ZLIB=ON" 195 "-DARROW_WITH_ZSTD=ON" 196 "-DARROW_MIMALLOC=ON" 197 "-DARROW_SUBSTRAIT=ON" 198 "-DARROW_FLIGHT=${if enableFlight then "ON" else "OFF"}" 199 "-DARROW_FLIGHT_TESTING=${if enableFlight then "ON" else "OFF"}" 200 "-DARROW_S3=${if enableS3 then "ON" else "OFF"}" 201 "-DARROW_GCS=${if enableGcs then "ON" else "OFF"}" 202 # Parquet options: 203 "-DARROW_PARQUET=ON" 204 "-DPARQUET_BUILD_EXECUTABLES=ON" 205 "-DPARQUET_REQUIRE_ENCRYPTION=ON" 206 ] ++ lib.optionals (!enableShared) [ 207 "-DARROW_TEST_LINKAGE=static" 208 ] ++ lib.optionals stdenv.isDarwin [ 209 "-DCMAKE_INSTALL_RPATH=@loader_path/../lib" # needed for tools executables 210 ] ++ lib.optionals (!stdenv.isx86_64) [ "-DARROW_USE_SIMD=OFF" ] 211 ++ lib.optionals enableS3 [ "-DAWSSDK_CORE_HEADER_FILE=${aws-sdk-cpp-arrow}/include/aws/core/Aws.h" ]; 212 213 doInstallCheck = true; 214 ARROW_TEST_DATA = lib.optionalString doInstallCheck "${arrow-testing}/data"; 215 PARQUET_TEST_DATA = lib.optionalString doInstallCheck "${parquet-testing}/data"; 216 GTEST_FILTER = 217 let 218 # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11398 219 filteredTests = lib.optionals stdenv.hostPlatform.isAarch64 [ 220 "TestFilterKernelWithNumeric/3.CompareArrayAndFilterRandomNumeric" 221 "TestFilterKernelWithNumeric/7.CompareArrayAndFilterRandomNumeric" 222 "TestCompareKernel.PrimitiveRandomTests" 223 ] ++ lib.optionals enableS3 [ 224 "S3OptionsTest.FromUri" 225 "S3RegionResolutionTest.NonExistentBucket" 226 "S3RegionResolutionTest.PublicBucket" 227 "S3RegionResolutionTest.RestrictedBucket" 228 "TestMinioServer.Connect" 229 "TestS3FS.*" 230 "TestS3FSGeneric.*" 231 ] ++ lib.optionals stdenv.isDarwin [ 232 # TODO: revisit at 12.0.0 or when 233 # https://github.com/apache/arrow/commit/295c6644ca6b67c95a662410b2c7faea0920c989 234 # is available, see 235 # https://github.com/apache/arrow/pull/15288#discussion_r1071244661 236 "ExecPlanExecution.StressSourceSinkStopped" 237 ]; 238 in 239 lib.optionalString doInstallCheck "-${lib.concatStringsSep ":" filteredTests}"; 240 241 __darwinAllowLocalNetworking = true; 242 243 nativeInstallCheckInputs = [ perl which sqlite ] 244 ++ lib.optionals enableS3 [ minio ] 245 ++ lib.optionals enableFlight [ python3 ]; 246 247 disabledTests = [ 248 # requires networking 249 "arrow-gcsfs-test" 250 "arrow-flight-integration-test" 251 ]; 252 253 installCheckPhase = '' 254 runHook preInstallCheck 255 256 ctest -L unittest --exclude-regex '^(${lib.concatStringsSep "|" disabledTests})$' 257 258 runHook postInstallCheck 259 ''; 260 261 meta = with lib; { 262 description = "A cross-language development platform for in-memory data"; 263 homepage = "https://arrow.apache.org/docs/cpp/"; 264 license = licenses.asl20; 265 platforms = platforms.unix; 266 maintainers = with maintainers; [ tobim veprbl cpcloud ]; 267 }; 268 passthru = { 269 inherit enableFlight enableJemalloc enableS3 enableGcs; 270 }; 271}