nixpkgs mirror (for testing) github.com/NixOS/nixpkgs
nix
at python-updates 345 lines 9.5 kB view raw
1# This is is arrow-cpp < 20 used as a workaround for 2# Ceph not supporting >= yet, taken from nixpkgs commit 3# 97ae53798f6a7c7c3c259ad8c2cbcede6ca34b2a~ 4# This should be entirely removed when upstream bug 5# https://tracker.ceph.com/issues/71269 6# is fixed. 7{ 8 stdenv, 9 lib, 10 fetchurl, 11 fetchpatch2, 12 fetchFromGitHub, 13 fixDarwinDylibNames, 14 autoconf, 15 aws-sdk-cpp, 16 aws-sdk-cpp-arrow ? aws-sdk-cpp.override { 17 apis = [ 18 "cognito-identity" 19 "config" 20 "identity-management" 21 "s3" 22 "sts" 23 "transfer" 24 ]; 25 }, 26 boost, 27 brotli, 28 bzip2, 29 cmake, 30 crc32c, 31 curl, 32 flatbuffers, 33 gflags, 34 glog, 35 google-cloud-cpp, 36 grpc, 37 gtest, 38 libbacktrace, 39 lz4, 40 minio, 41 ninja, 42 nlohmann_json, 43 openssl, 44 perl, 45 pkg-config, 46 protobuf_32, 47 python3, 48 rapidjson, 49 re2, 50 snappy, 51 sqlite, 52 thrift, 53 tzdata, 54 utf8proc, 55 which, 56 zlib, 57 zstd, 58 testers, 59 enableShared ? !stdenv.hostPlatform.isStatic, 60 enableFlight ? stdenv.buildPlatform == stdenv.hostPlatform, 61 # Disable also on RiscV 62 # configure: error: cannot determine number of significant virtual address bits 63 enableJemalloc ? 64 !stdenv.hostPlatform.isDarwin && !stdenv.hostPlatform.isAarch64 && !stdenv.hostPlatform.isRiscV64, 65 enableS3 ? true, 66 # google-cloud-cpp fails to build on RiscV 67 enableGcs ? !stdenv.hostPlatform.isDarwin && !stdenv.hostPlatform.isRiscV64, 68}: 69 70let 71 arrow-testing = fetchFromGitHub { 72 name = "arrow-testing"; 73 owner = "apache"; 74 repo = "arrow-testing"; 75 rev = "4d209492d514c2d3cb2d392681b9aa00e6d8da1c"; 76 hash = "sha256-IkiCbuy0bWyClPZ4ZEdkEP7jFYLhM7RCuNLd6Lazd4o="; 77 }; 78 79 parquet-testing = fetchFromGitHub { 80 name = "parquet-testing"; 81 owner = "apache"; 82 repo = "parquet-testing"; 83 rev = "c7cf1374cf284c0c73024cd1437becea75558bf8"; 84 hash = "sha256-DThjyZ34LajHwXZy1IhYKUGUG/ejQ9WvBNuI8eUKmSs="; 85 }; 86 87 version = "19.0.1"; 88in 89stdenv.mkDerivation (finalAttrs: { 90 pname = "arrow-cpp"; 91 inherit version; 92 93 src = fetchFromGitHub { 94 owner = "apache"; 95 repo = "arrow"; 96 rev = "apache-arrow-${version}"; 97 hash = "sha256-toHwUIOZRpgR0K7pQtT5nqWpO9G7AuHYTcvA6UVg9lA="; 98 }; 99 100 sourceRoot = "${finalAttrs.src.name}/cpp"; 101 102 patches = [ 103 (fetchpatch2 { 104 name = "protobuf-30-compat.patch"; 105 url = "https://github.com/apache/arrow/pull/46136.patch"; 106 hash = "sha256-WTpe/eT3himlCHN/R78w1sF0HG859mE2ZN70U+9N8Ag="; 107 stripLen = 1; 108 }) 109 (fetchpatch2 { 110 name = "cmake-fix.patch"; 111 url = "https://github.com/apache/arrow/commit/48c0bbbd4a2eedcca518caeb7f7547c7988dc740.patch?full_index=1"; 112 hash = "sha256-i/vZy/61VYP+mo1AxfoiBSjTip04vhFOh3hGjHCJy6g="; 113 stripLen = 1; # applying patch from within `cpp/` subdirectory 114 }) 115 ]; 116 117 # versions are all taken from 118 # https://github.com/apache/arrow/blob/apache-arrow-${version}/cpp/thirdparty/versions.txt 119 120 # jemalloc: arrow uses a custom prefix to prevent default allocator symbol 121 # collisions as well as custom build flags 122 ${if enableJemalloc then "ARROW_JEMALLOC_URL" else null} = fetchurl { 123 url = "https://github.com/jemalloc/jemalloc/releases/download/5.3.0/jemalloc-5.3.0.tar.bz2"; 124 hash = "sha256-LbgtHnEZ3z5xt2QCGbbf6EeJvAU3mDw7esT3GJrs/qo="; 125 }; 126 127 # mimalloc: arrow uses custom build flags for mimalloc 128 ARROW_MIMALLOC_URL = fetchFromGitHub { 129 owner = "microsoft"; 130 repo = "mimalloc"; 131 rev = "v2.0.6"; 132 hash = "sha256-u2ITXABBN/dwU+mCIbL3tN1f4c17aBuSdNTV+Adtohc="; 133 }; 134 135 ARROW_XSIMD_URL = fetchFromGitHub { 136 owner = "xtensor-stack"; 137 repo = "xsimd"; 138 rev = "13.0.0"; 139 hash = "sha256-qElJYW5QDj3s59L3NgZj5zkhnUMzIP2mBa1sPks3/CE="; 140 }; 141 142 ARROW_SUBSTRAIT_URL = fetchFromGitHub { 143 owner = "substrait-io"; 144 repo = "substrait"; 145 rev = "v0.44.0"; 146 hash = "sha256-V739IFTGPtbGPlxcOi8sAaYSDhNUEpITvN9IqdPReug="; 147 }; 148 149 nativeBuildInputs = [ 150 cmake 151 pkg-config 152 ninja 153 autoconf # for vendored jemalloc 154 flatbuffers 155 ] 156 ++ lib.optional stdenv.hostPlatform.isDarwin fixDarwinDylibNames; 157 buildInputs = [ 158 boost 159 brotli 160 bzip2 161 flatbuffers 162 gflags 163 glog 164 gtest 165 libbacktrace 166 lz4 167 nlohmann_json # alternative JSON parser to rapidjson 168 protobuf_32 # substrait requires protobuf 169 rapidjson 170 re2 171 snappy 172 thrift 173 utf8proc 174 zlib 175 zstd 176 ] 177 ++ lib.optionals enableFlight [ 178 grpc 179 openssl 180 sqlite 181 ] 182 ++ lib.optionals enableS3 [ 183 aws-sdk-cpp-arrow 184 openssl 185 ] 186 ++ lib.optionals enableGcs [ 187 crc32c 188 curl 189 google-cloud-cpp 190 grpc 191 nlohmann_json 192 ]; 193 194 preConfigure = '' 195 patchShebangs build-support/ 196 substituteInPlace "src/arrow/vendored/datetime/tz.cpp" \ 197 --replace-fail 'discover_tz_dir();' '"${tzdata}/share/zoneinfo";' 198 ''; 199 200 cmakeFlags = [ 201 "-DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON" 202 "-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}" 203 "-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}" 204 "-DARROW_BUILD_TESTS=${if enableShared then "ON" else "OFF"}" 205 "-DARROW_BUILD_INTEGRATION=ON" 206 "-DARROW_BUILD_UTILITIES=ON" 207 "-DARROW_EXTRA_ERROR_CONTEXT=ON" 208 "-DARROW_VERBOSE_THIRDPARTY_BUILD=ON" 209 "-DARROW_DEPENDENCY_SOURCE=SYSTEM" 210 "-Dxsimd_SOURCE=AUTO" 211 "-DARROW_DEPENDENCY_USE_SHARED=${if enableShared then "ON" else "OFF"}" 212 "-DARROW_COMPUTE=ON" 213 "-DARROW_CSV=ON" 214 "-DARROW_DATASET=ON" 215 "-DARROW_FILESYSTEM=ON" 216 "-DARROW_FLIGHT_SQL=${if enableFlight then "ON" else "OFF"}" 217 "-DARROW_HDFS=ON" 218 "-DARROW_IPC=ON" 219 "-DARROW_JEMALLOC=${if enableJemalloc then "ON" else "OFF"}" 220 "-DARROW_JSON=ON" 221 "-DARROW_USE_GLOG=ON" 222 "-DARROW_WITH_BACKTRACE=ON" 223 "-DARROW_WITH_BROTLI=ON" 224 "-DARROW_WITH_BZ2=ON" 225 "-DARROW_WITH_LZ4=ON" 226 "-DARROW_WITH_NLOHMANN_JSON=ON" 227 "-DARROW_WITH_SNAPPY=ON" 228 "-DARROW_WITH_UTF8PROC=ON" 229 "-DARROW_WITH_ZLIB=ON" 230 "-DARROW_WITH_ZSTD=ON" 231 "-DARROW_MIMALLOC=ON" 232 "-DARROW_SUBSTRAIT=ON" 233 "-DARROW_FLIGHT=${if enableFlight then "ON" else "OFF"}" 234 "-DARROW_FLIGHT_TESTING=${if enableFlight then "ON" else "OFF"}" 235 "-DARROW_S3=${if enableS3 then "ON" else "OFF"}" 236 "-DARROW_GCS=${if enableGcs then "ON" else "OFF"}" 237 # Parquet options: 238 "-DARROW_PARQUET=ON" 239 "-DPARQUET_BUILD_EXECUTABLES=ON" 240 "-DPARQUET_REQUIRE_ENCRYPTION=ON" 241 ] 242 ++ lib.optionals (!enableShared) [ "-DARROW_TEST_LINKAGE=static" ] 243 ++ lib.optionals stdenv.hostPlatform.isDarwin [ 244 "-DCMAKE_INSTALL_RPATH=@loader_path/../lib" # needed for tools executables 245 ] 246 ++ lib.optionals (!stdenv.hostPlatform.isx86_64) [ "-DARROW_USE_SIMD=OFF" ] 247 ++ lib.optionals enableS3 [ 248 "-DAWSSDK_CORE_HEADER_FILE=${aws-sdk-cpp-arrow}/include/aws/core/Aws.h" 249 ]; 250 251 doInstallCheck = true; 252 ARROW_TEST_DATA = lib.optionalString finalAttrs.doInstallCheck "${arrow-testing}/data"; 253 PARQUET_TEST_DATA = lib.optionalString finalAttrs.doInstallCheck "${parquet-testing}/data"; 254 GTEST_FILTER = 255 let 256 # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11398 257 filteredTests = 258 lib.optionals stdenv.hostPlatform.isAarch64 [ 259 "TestFilterKernelWithNumeric/3.CompareArrayAndFilterRandomNumeric" 260 "TestFilterKernelWithNumeric/7.CompareArrayAndFilterRandomNumeric" 261 "TestCompareKernel.PrimitiveRandomTests" 262 ] 263 ++ lib.optionals enableS3 [ 264 "S3OptionsTest.FromUri" 265 "S3RegionResolutionTest.NonExistentBucket" 266 "S3RegionResolutionTest.PublicBucket" 267 "S3RegionResolutionTest.RestrictedBucket" 268 "TestMinioServer.Connect" 269 "TestS3FS.*" 270 "TestS3FSGeneric.*" 271 ] 272 ++ lib.optionals stdenv.hostPlatform.isDarwin [ 273 # TODO: revisit at 12.0.0 or when 274 # https://github.com/apache/arrow/commit/295c6644ca6b67c95a662410b2c7faea0920c989 275 # is available, see 276 # https://github.com/apache/arrow/pull/15288#discussion_r1071244661 277 "ExecPlanExecution.StressSourceSinkStopped" 278 ]; 279 in 280 lib.optionalString finalAttrs.doInstallCheck "-${lib.concatStringsSep ":" filteredTests}"; 281 282 __darwinAllowLocalNetworking = true; 283 284 nativeInstallCheckInputs = [ 285 perl 286 which 287 sqlite 288 ] 289 ++ lib.optionals enableS3 [ minio ] 290 ++ lib.optionals enableFlight [ python3 ]; 291 292 installCheckPhase = 293 let 294 disabledTests = [ 295 # flaky 296 "arrow-flight-test" 297 # requires networking 298 "arrow-gcsfs-test" 299 "arrow-flight-integration-test" 300 ]; 301 in 302 '' 303 runHook preInstallCheck 304 305 ctest -L unittest --exclude-regex '^(${lib.concatStringsSep "|" disabledTests})$' 306 307 runHook postInstallCheck 308 ''; 309 310 meta = { 311 description = "Cross-language development platform for in-memory data"; 312 homepage = "https://arrow.apache.org/docs/cpp/"; 313 license = lib.licenses.asl20; 314 platforms = lib.platforms.unix; 315 maintainers = with lib.maintainers; [ 316 tobim 317 veprbl 318 cpcloud 319 ]; 320 pkgConfigModules = [ 321 "arrow" 322 "arrow-acero" 323 "arrow-compute" 324 "arrow-csv" 325 "arrow-dataset" 326 "arrow-filesystem" 327 "arrow-flight" 328 "arrow-flight-sql" 329 "arrow-flight-testing" 330 "arrow-json" 331 "arrow-substrait" 332 "arrow-testing" 333 "parquet" 334 ]; 335 }; 336 passthru = { 337 inherit 338 enableFlight 339 enableJemalloc 340 enableS3 341 enableGcs 342 ; 343 tests.pkg-config = testers.testMetaPkgConfig finalAttrs.finalPackage; 344 }; 345})