at master 338 lines 9.0 kB view raw
1{ 2 stdenv, 3 lib, 4 fetchurl, 5 fetchFromGitHub, 6 fixDarwinDylibNames, 7 apache-orc, 8 autoconf, 9 aws-sdk-cpp, 10 aws-sdk-cpp-arrow ? aws-sdk-cpp.override { 11 apis = [ 12 "cognito-identity" 13 "config" 14 "identity-management" 15 "s3" 16 "sts" 17 "transfer" 18 ]; 19 }, 20 boost, 21 brotli, 22 bzip2, 23 cmake, 24 crc32c, 25 curl, 26 flatbuffers, 27 gflags, 28 glog, 29 google-cloud-cpp, 30 grpc, 31 gtest, 32 libbacktrace, 33 lz4, 34 minio, 35 ninja, 36 nlohmann_json, 37 openssl, 38 perl, 39 pkg-config, 40 protobuf_31, 41 python3, 42 rapidjson, 43 re2, 44 snappy, 45 sqlite, 46 thrift, 47 tzdata, 48 utf8proc, 49 which, 50 zlib, 51 zstd, 52 testers, 53 enableShared ? !stdenv.hostPlatform.isStatic, 54 enableFlight ? stdenv.buildPlatform == stdenv.hostPlatform, 55 # Disable also on RiscV 56 # configure: error: cannot determine number of significant virtual address bits 57 enableJemalloc ? 58 !stdenv.hostPlatform.isDarwin && !stdenv.hostPlatform.isAarch64 && !stdenv.hostPlatform.isRiscV64, 59 enableS3 ? true, 60 # google-cloud-cpp fails to build on RiscV 61 enableGcs ? !stdenv.hostPlatform.isDarwin && !stdenv.hostPlatform.isRiscV64, 62}: 63 64let 65 arrow-testing = fetchFromGitHub { 66 name = "arrow-testing"; 67 owner = "apache"; 68 repo = "arrow-testing"; 69 rev = "d2a13712303498963395318a4eb42872e66aead7"; 70 hash = "sha256-c8FL37kG0uo7o0Zp71WjCl7FD5BnVgqUCCXXX9gI0lg="; 71 }; 72 73 parquet-testing = fetchFromGitHub { 74 name = "parquet-testing"; 75 owner = "apache"; 76 repo = "parquet-testing"; 77 rev = "18d17540097fca7c40be3d42c167e6bfad90763c"; 78 hash = "sha256-gKEQc2RKpVp39RmuZbIeIXAwiAXDHGnLXF6VQuJtnRA="; 79 }; 80 81 version = "20.0.0"; 82in 83stdenv.mkDerivation (finalAttrs: { 84 pname = "arrow-cpp"; 85 inherit version; 86 87 src = fetchFromGitHub { 88 owner = "apache"; 89 repo = "arrow"; 90 rev = "apache-arrow-${version}"; 91 hash = "sha256-JFPdKraCU+xRkBTAHyY4QGnBVlOjQ1P5+gq9uxyqJtk="; 92 }; 93 94 sourceRoot = "${finalAttrs.src.name}/cpp"; 95 96 # versions are all taken from 97 # https://github.com/apache/arrow/blob/apache-arrow-${version}/cpp/thirdparty/versions.txt 98 99 # jemalloc: arrow uses a custom prefix to prevent default allocator symbol 100 # collisions as well as custom build flags 101 ${if enableJemalloc then "ARROW_JEMALLOC_URL" else null} = fetchurl { 102 url = "https://github.com/jemalloc/jemalloc/releases/download/5.3.0/jemalloc-5.3.0.tar.bz2"; 103 hash = "sha256-LbgtHnEZ3z5xt2QCGbbf6EeJvAU3mDw7esT3GJrs/qo="; 104 }; 105 106 # mimalloc: arrow uses custom build flags for mimalloc 107 ARROW_MIMALLOC_URL = fetchFromGitHub { 108 owner = "microsoft"; 109 repo = "mimalloc"; 110 rev = "v2.0.6"; 111 hash = "sha256-u2ITXABBN/dwU+mCIbL3tN1f4c17aBuSdNTV+Adtohc="; 112 }; 113 114 ARROW_XSIMD_URL = fetchFromGitHub { 115 owner = "xtensor-stack"; 116 repo = "xsimd"; 117 rev = "13.0.0"; 118 hash = "sha256-qElJYW5QDj3s59L3NgZj5zkhnUMzIP2mBa1sPks3/CE="; 119 }; 120 121 ARROW_SUBSTRAIT_URL = fetchFromGitHub { 122 owner = "substrait-io"; 123 repo = "substrait"; 124 rev = "v0.44.0"; 125 hash = "sha256-V739IFTGPtbGPlxcOi8sAaYSDhNUEpITvN9IqdPReug="; 126 }; 127 128 nativeBuildInputs = [ 129 cmake 130 pkg-config 131 ninja 132 autoconf # for vendored jemalloc 133 flatbuffers 134 ] 135 ++ lib.optional stdenv.hostPlatform.isDarwin fixDarwinDylibNames; 136 buildInputs = [ 137 apache-orc 138 boost 139 brotli 140 bzip2 141 flatbuffers 142 gflags 143 glog 144 gtest 145 libbacktrace 146 lz4 147 nlohmann_json # alternative JSON parser to rapidjson 148 protobuf_31 # substrait requires protobuf 149 rapidjson 150 re2 151 snappy 152 thrift 153 utf8proc 154 zlib 155 zstd 156 ] 157 ++ lib.optionals enableFlight [ 158 grpc 159 openssl 160 protobuf_31 161 sqlite 162 ] 163 ++ lib.optionals enableS3 [ 164 aws-sdk-cpp-arrow 165 openssl 166 ] 167 ++ lib.optionals enableGcs [ 168 crc32c 169 curl 170 google-cloud-cpp 171 grpc 172 nlohmann_json 173 ]; 174 175 # apache-orc looks for things in caps 176 env = { 177 LZ4_ROOT = lz4; 178 ZSTD_ROOT = zstd.dev; 179 }; 180 181 # fails tests on glibc with this enabled 182 hardeningDisable = [ "glibcxxassertions" ]; 183 184 preConfigure = '' 185 patchShebangs build-support/ 186 substituteInPlace "src/arrow/vendored/datetime/tz.cpp" \ 187 --replace-fail 'discover_tz_dir();' '"${tzdata}/share/zoneinfo";' 188 ''; 189 190 cmakeFlags = [ 191 "-DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON" 192 "-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}" 193 "-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}" 194 "-DARROW_BUILD_TESTS=${if enableShared then "ON" else "OFF"}" 195 "-DARROW_BUILD_INTEGRATION=ON" 196 "-DARROW_BUILD_UTILITIES=ON" 197 "-DARROW_EXTRA_ERROR_CONTEXT=ON" 198 "-DARROW_VERBOSE_THIRDPARTY_BUILD=ON" 199 "-DARROW_DEPENDENCY_SOURCE=SYSTEM" 200 "-Dxsimd_SOURCE=AUTO" 201 "-DARROW_DEPENDENCY_USE_SHARED=${if enableShared then "ON" else "OFF"}" 202 "-DARROW_COMPUTE=ON" 203 "-DARROW_CSV=ON" 204 "-DARROW_DATASET=ON" 205 "-DARROW_FILESYSTEM=ON" 206 "-DARROW_FLIGHT_SQL=${if enableFlight then "ON" else "OFF"}" 207 "-DARROW_HDFS=ON" 208 "-DARROW_IPC=ON" 209 "-DARROW_JEMALLOC=${if enableJemalloc then "ON" else "OFF"}" 210 "-DARROW_JSON=ON" 211 "-DARROW_USE_GLOG=ON" 212 "-DARROW_WITH_BACKTRACE=ON" 213 "-DARROW_WITH_BROTLI=ON" 214 "-DARROW_WITH_BZ2=ON" 215 "-DARROW_WITH_LZ4=ON" 216 "-DARROW_WITH_NLOHMANN_JSON=ON" 217 "-DARROW_WITH_SNAPPY=ON" 218 "-DARROW_WITH_UTF8PROC=ON" 219 "-DARROW_WITH_ZLIB=ON" 220 "-DARROW_WITH_ZSTD=ON" 221 "-DARROW_MIMALLOC=ON" 222 "-DARROW_SUBSTRAIT=ON" 223 "-DARROW_FLIGHT=${if enableFlight then "ON" else "OFF"}" 224 "-DARROW_FLIGHT_TESTING=${if enableFlight then "ON" else "OFF"}" 225 "-DARROW_S3=${if enableS3 then "ON" else "OFF"}" 226 "-DARROW_GCS=${if enableGcs then "ON" else "OFF"}" 227 "-DARROW_ORC=ON" 228 # Parquet options: 229 "-DARROW_PARQUET=ON" 230 "-DPARQUET_BUILD_EXECUTABLES=ON" 231 "-DPARQUET_REQUIRE_ENCRYPTION=ON" 232 ] 233 ++ lib.optionals (!enableShared) [ "-DARROW_TEST_LINKAGE=static" ] 234 ++ lib.optionals stdenv.hostPlatform.isDarwin [ 235 "-DCMAKE_INSTALL_RPATH=@loader_path/../lib" # needed for tools executables 236 ] 237 ++ lib.optionals (!stdenv.hostPlatform.isx86_64) [ "-DARROW_USE_SIMD=OFF" ] 238 ++ lib.optionals enableS3 [ 239 "-DAWSSDK_CORE_HEADER_FILE=${aws-sdk-cpp-arrow}/include/aws/core/Aws.h" 240 ]; 241 242 doInstallCheck = true; 243 ARROW_TEST_DATA = lib.optionalString finalAttrs.doInstallCheck "${arrow-testing}/data"; 244 PARQUET_TEST_DATA = lib.optionalString finalAttrs.doInstallCheck "${parquet-testing}/data"; 245 GTEST_FILTER = 246 let 247 # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11398 248 filteredTests = 249 lib.optionals stdenv.hostPlatform.isAarch64 [ 250 "TestFilterKernelWithNumeric/3.CompareArrayAndFilterRandomNumeric" 251 "TestFilterKernelWithNumeric/7.CompareArrayAndFilterRandomNumeric" 252 "TestCompareKernel.PrimitiveRandomTests" 253 ] 254 ++ lib.optionals enableS3 [ 255 "S3OptionsTest.FromUri" 256 "S3RegionResolutionTest.NonExistentBucket" 257 "S3RegionResolutionTest.PublicBucket" 258 "S3RegionResolutionTest.RestrictedBucket" 259 "TestMinioServer.Connect" 260 "TestS3FS.*" 261 "TestS3FSGeneric.*" 262 ] 263 ++ lib.optionals stdenv.hostPlatform.isDarwin [ 264 # TODO: revisit at 12.0.0 or when 265 # https://github.com/apache/arrow/commit/295c6644ca6b67c95a662410b2c7faea0920c989 266 # is available, see 267 # https://github.com/apache/arrow/pull/15288#discussion_r1071244661 268 "ExecPlanExecution.StressSourceSinkStopped" 269 ]; 270 in 271 lib.optionalString finalAttrs.doInstallCheck "-${lib.concatStringsSep ":" filteredTests}"; 272 273 __darwinAllowLocalNetworking = true; 274 275 nativeInstallCheckInputs = [ 276 perl 277 which 278 sqlite 279 ] 280 ++ lib.optionals enableS3 [ minio ] 281 ++ lib.optionals enableFlight [ python3 ]; 282 283 installCheckPhase = 284 let 285 disabledTests = [ 286 # flaky 287 "arrow-flight-test" 288 # requires networking 289 "arrow-gcsfs-test" 290 "arrow-flight-integration-test" 291 # File already exists in database: orc_proto.proto 292 "arrow-orc-adapter-test" 293 ]; 294 in 295 '' 296 runHook preInstallCheck 297 298 ctest -L unittest --exclude-regex '^(${lib.concatStringsSep "|" disabledTests})$' 299 300 runHook postInstallCheck 301 ''; 302 303 meta = with lib; { 304 description = "Cross-language development platform for in-memory data"; 305 homepage = "https://arrow.apache.org/docs/cpp/"; 306 license = licenses.asl20; 307 platforms = platforms.unix; 308 maintainers = with maintainers; [ 309 tobim 310 veprbl 311 cpcloud 312 ]; 313 pkgConfigModules = [ 314 "arrow" 315 "arrow-acero" 316 "arrow-compute" 317 "arrow-csv" 318 "arrow-dataset" 319 "arrow-filesystem" 320 "arrow-flight" 321 "arrow-flight-sql" 322 "arrow-flight-testing" 323 "arrow-json" 324 "arrow-substrait" 325 "arrow-testing" 326 "parquet" 327 ]; 328 }; 329 passthru = { 330 inherit 331 enableFlight 332 enableJemalloc 333 enableS3 334 enableGcs 335 ; 336 tests.pkg-config = testers.testMetaPkgConfig finalAttrs.finalPackage; 337 }; 338})