Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
at master 340 lines 9.2 kB view raw
1# This is is arrow-cpp < 20 used as a workaround for 2# Ceph not supporting >= yet, taken from nixpkgs commit 3# 97ae53798f6a7c7c3c259ad8c2cbcede6ca34b2a~ 4# This should be entirely removed when upstream bug 5# https://tracker.ceph.com/issues/71269 6# is fixed. 7{ 8 stdenv, 9 lib, 10 fetchurl, 11 fetchpatch2, 12 fetchFromGitHub, 13 fixDarwinDylibNames, 14 autoconf, 15 aws-sdk-cpp, 16 aws-sdk-cpp-arrow ? aws-sdk-cpp.override { 17 apis = [ 18 "cognito-identity" 19 "config" 20 "identity-management" 21 "s3" 22 "sts" 23 "transfer" 24 ]; 25 }, 26 boost, 27 brotli, 28 bzip2, 29 cmake, 30 crc32c, 31 curl, 32 flatbuffers, 33 gflags, 34 glog, 35 google-cloud-cpp, 36 grpc, 37 gtest, 38 libbacktrace, 39 lz4, 40 minio, 41 ninja, 42 nlohmann_json, 43 openssl, 44 perl, 45 pkg-config, 46 protobuf, 47 python3, 48 rapidjson, 49 re2, 50 snappy, 51 sqlite, 52 thrift, 53 tzdata, 54 utf8proc, 55 which, 56 zlib, 57 zstd, 58 testers, 59 enableShared ? !stdenv.hostPlatform.isStatic, 60 enableFlight ? stdenv.buildPlatform == stdenv.hostPlatform, 61 # Disable also on RiscV 62 # configure: error: cannot determine number of significant virtual address bits 63 enableJemalloc ? 64 !stdenv.hostPlatform.isDarwin && !stdenv.hostPlatform.isAarch64 && !stdenv.hostPlatform.isRiscV64, 65 enableS3 ? true, 66 # google-cloud-cpp fails to build on RiscV 67 enableGcs ? !stdenv.hostPlatform.isDarwin && !stdenv.hostPlatform.isRiscV64, 68}: 69 70let 71 arrow-testing = fetchFromGitHub { 72 name = "arrow-testing"; 73 owner = "apache"; 74 repo = "arrow-testing"; 75 rev = "4d209492d514c2d3cb2d392681b9aa00e6d8da1c"; 76 hash = "sha256-IkiCbuy0bWyClPZ4ZEdkEP7jFYLhM7RCuNLd6Lazd4o="; 77 }; 78 79 parquet-testing = fetchFromGitHub { 80 name = "parquet-testing"; 81 owner = "apache"; 82 repo = "parquet-testing"; 83 rev = "c7cf1374cf284c0c73024cd1437becea75558bf8"; 84 hash = "sha256-DThjyZ34LajHwXZy1IhYKUGUG/ejQ9WvBNuI8eUKmSs="; 85 }; 86 87 version = "19.0.1"; 88in 89stdenv.mkDerivation (finalAttrs: { 90 pname = "arrow-cpp"; 91 inherit version; 92 93 src = fetchFromGitHub { 94 owner = "apache"; 95 repo = "arrow"; 96 rev = "apache-arrow-${version}"; 97 hash = "sha256-toHwUIOZRpgR0K7pQtT5nqWpO9G7AuHYTcvA6UVg9lA="; 98 }; 99 100 sourceRoot = "${finalAttrs.src.name}/cpp"; 101 102 patches = [ 103 (fetchpatch2 { 104 name = "protobuf-30-compat.patch"; 105 url = "https://github.com/apache/arrow/pull/46136.patch"; 106 hash = "sha256-WTpe/eT3himlCHN/R78w1sF0HG859mE2ZN70U+9N8Ag="; 107 stripLen = 1; 108 }) 109 ]; 110 111 # versions are all taken from 112 # https://github.com/apache/arrow/blob/apache-arrow-${version}/cpp/thirdparty/versions.txt 113 114 # jemalloc: arrow uses a custom prefix to prevent default allocator symbol 115 # collisions as well as custom build flags 116 ${if enableJemalloc then "ARROW_JEMALLOC_URL" else null} = fetchurl { 117 url = "https://github.com/jemalloc/jemalloc/releases/download/5.3.0/jemalloc-5.3.0.tar.bz2"; 118 hash = "sha256-LbgtHnEZ3z5xt2QCGbbf6EeJvAU3mDw7esT3GJrs/qo="; 119 }; 120 121 # mimalloc: arrow uses custom build flags for mimalloc 122 ARROW_MIMALLOC_URL = fetchFromGitHub { 123 owner = "microsoft"; 124 repo = "mimalloc"; 125 rev = "v2.0.6"; 126 hash = "sha256-u2ITXABBN/dwU+mCIbL3tN1f4c17aBuSdNTV+Adtohc="; 127 }; 128 129 ARROW_XSIMD_URL = fetchFromGitHub { 130 owner = "xtensor-stack"; 131 repo = "xsimd"; 132 rev = "13.0.0"; 133 hash = "sha256-qElJYW5QDj3s59L3NgZj5zkhnUMzIP2mBa1sPks3/CE="; 134 }; 135 136 ARROW_SUBSTRAIT_URL = fetchFromGitHub { 137 owner = "substrait-io"; 138 repo = "substrait"; 139 rev = "v0.44.0"; 140 hash = "sha256-V739IFTGPtbGPlxcOi8sAaYSDhNUEpITvN9IqdPReug="; 141 }; 142 143 nativeBuildInputs = [ 144 cmake 145 pkg-config 146 ninja 147 autoconf # for vendored jemalloc 148 flatbuffers 149 ] 150 ++ lib.optional stdenv.hostPlatform.isDarwin fixDarwinDylibNames; 151 buildInputs = [ 152 boost 153 brotli 154 bzip2 155 flatbuffers 156 gflags 157 glog 158 gtest 159 libbacktrace 160 lz4 161 nlohmann_json # alternative JSON parser to rapidjson 162 protobuf # substrait requires protobuf 163 rapidjson 164 re2 165 snappy 166 thrift 167 utf8proc 168 zlib 169 zstd 170 ] 171 ++ lib.optionals enableFlight [ 172 grpc 173 openssl 174 protobuf 175 sqlite 176 ] 177 ++ lib.optionals enableS3 [ 178 aws-sdk-cpp-arrow 179 openssl 180 ] 181 ++ lib.optionals enableGcs [ 182 crc32c 183 curl 184 google-cloud-cpp 185 grpc 186 nlohmann_json 187 ]; 188 189 preConfigure = '' 190 patchShebangs build-support/ 191 substituteInPlace "src/arrow/vendored/datetime/tz.cpp" \ 192 --replace-fail 'discover_tz_dir();' '"${tzdata}/share/zoneinfo";' 193 ''; 194 195 cmakeFlags = [ 196 "-DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON" 197 "-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}" 198 "-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}" 199 "-DARROW_BUILD_TESTS=${if enableShared then "ON" else "OFF"}" 200 "-DARROW_BUILD_INTEGRATION=ON" 201 "-DARROW_BUILD_UTILITIES=ON" 202 "-DARROW_EXTRA_ERROR_CONTEXT=ON" 203 "-DARROW_VERBOSE_THIRDPARTY_BUILD=ON" 204 "-DARROW_DEPENDENCY_SOURCE=SYSTEM" 205 "-Dxsimd_SOURCE=AUTO" 206 "-DARROW_DEPENDENCY_USE_SHARED=${if enableShared then "ON" else "OFF"}" 207 "-DARROW_COMPUTE=ON" 208 "-DARROW_CSV=ON" 209 "-DARROW_DATASET=ON" 210 "-DARROW_FILESYSTEM=ON" 211 "-DARROW_FLIGHT_SQL=${if enableFlight then "ON" else "OFF"}" 212 "-DARROW_HDFS=ON" 213 "-DARROW_IPC=ON" 214 "-DARROW_JEMALLOC=${if enableJemalloc then "ON" else "OFF"}" 215 "-DARROW_JSON=ON" 216 "-DARROW_USE_GLOG=ON" 217 "-DARROW_WITH_BACKTRACE=ON" 218 "-DARROW_WITH_BROTLI=ON" 219 "-DARROW_WITH_BZ2=ON" 220 "-DARROW_WITH_LZ4=ON" 221 "-DARROW_WITH_NLOHMANN_JSON=ON" 222 "-DARROW_WITH_SNAPPY=ON" 223 "-DARROW_WITH_UTF8PROC=ON" 224 "-DARROW_WITH_ZLIB=ON" 225 "-DARROW_WITH_ZSTD=ON" 226 "-DARROW_MIMALLOC=ON" 227 "-DARROW_SUBSTRAIT=ON" 228 "-DARROW_FLIGHT=${if enableFlight then "ON" else "OFF"}" 229 "-DARROW_FLIGHT_TESTING=${if enableFlight then "ON" else "OFF"}" 230 "-DARROW_S3=${if enableS3 then "ON" else "OFF"}" 231 "-DARROW_GCS=${if enableGcs then "ON" else "OFF"}" 232 # Parquet options: 233 "-DARROW_PARQUET=ON" 234 "-DPARQUET_BUILD_EXECUTABLES=ON" 235 "-DPARQUET_REQUIRE_ENCRYPTION=ON" 236 ] 237 ++ lib.optionals (!enableShared) [ "-DARROW_TEST_LINKAGE=static" ] 238 ++ lib.optionals stdenv.hostPlatform.isDarwin [ 239 "-DCMAKE_INSTALL_RPATH=@loader_path/../lib" # needed for tools executables 240 ] 241 ++ lib.optionals (!stdenv.hostPlatform.isx86_64) [ "-DARROW_USE_SIMD=OFF" ] 242 ++ lib.optionals enableS3 [ 243 "-DAWSSDK_CORE_HEADER_FILE=${aws-sdk-cpp-arrow}/include/aws/core/Aws.h" 244 ]; 245 246 doInstallCheck = true; 247 ARROW_TEST_DATA = lib.optionalString finalAttrs.doInstallCheck "${arrow-testing}/data"; 248 PARQUET_TEST_DATA = lib.optionalString finalAttrs.doInstallCheck "${parquet-testing}/data"; 249 GTEST_FILTER = 250 let 251 # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11398 252 filteredTests = 253 lib.optionals stdenv.hostPlatform.isAarch64 [ 254 "TestFilterKernelWithNumeric/3.CompareArrayAndFilterRandomNumeric" 255 "TestFilterKernelWithNumeric/7.CompareArrayAndFilterRandomNumeric" 256 "TestCompareKernel.PrimitiveRandomTests" 257 ] 258 ++ lib.optionals enableS3 [ 259 "S3OptionsTest.FromUri" 260 "S3RegionResolutionTest.NonExistentBucket" 261 "S3RegionResolutionTest.PublicBucket" 262 "S3RegionResolutionTest.RestrictedBucket" 263 "TestMinioServer.Connect" 264 "TestS3FS.*" 265 "TestS3FSGeneric.*" 266 ] 267 ++ lib.optionals stdenv.hostPlatform.isDarwin [ 268 # TODO: revisit at 12.0.0 or when 269 # https://github.com/apache/arrow/commit/295c6644ca6b67c95a662410b2c7faea0920c989 270 # is available, see 271 # https://github.com/apache/arrow/pull/15288#discussion_r1071244661 272 "ExecPlanExecution.StressSourceSinkStopped" 273 ]; 274 in 275 lib.optionalString finalAttrs.doInstallCheck "-${lib.concatStringsSep ":" filteredTests}"; 276 277 __darwinAllowLocalNetworking = true; 278 279 nativeInstallCheckInputs = [ 280 perl 281 which 282 sqlite 283 ] 284 ++ lib.optionals enableS3 [ minio ] 285 ++ lib.optionals enableFlight [ python3 ]; 286 287 installCheckPhase = 288 let 289 disabledTests = [ 290 # flaky 291 "arrow-flight-test" 292 # requires networking 293 "arrow-gcsfs-test" 294 "arrow-flight-integration-test" 295 ]; 296 in 297 '' 298 runHook preInstallCheck 299 300 ctest -L unittest --exclude-regex '^(${lib.concatStringsSep "|" disabledTests})$' 301 302 runHook postInstallCheck 303 ''; 304 305 meta = with lib; { 306 description = "Cross-language development platform for in-memory data"; 307 homepage = "https://arrow.apache.org/docs/cpp/"; 308 license = licenses.asl20; 309 platforms = platforms.unix; 310 maintainers = with maintainers; [ 311 tobim 312 veprbl 313 cpcloud 314 ]; 315 pkgConfigModules = [ 316 "arrow" 317 "arrow-acero" 318 "arrow-compute" 319 "arrow-csv" 320 "arrow-dataset" 321 "arrow-filesystem" 322 "arrow-flight" 323 "arrow-flight-sql" 324 "arrow-flight-testing" 325 "arrow-json" 326 "arrow-substrait" 327 "arrow-testing" 328 "parquet" 329 ]; 330 }; 331 passthru = { 332 inherit 333 enableFlight 334 enableJemalloc 335 enableS3 336 enableGcs 337 ; 338 tests.pkg-config = testers.testMetaPkgConfig finalAttrs.finalPackage; 339 }; 340})