nixpkgs mirror (for testing) github.com/NixOS/nixpkgs
nix
at 22.05 259 lines 7.7 kB view raw
1{ stdenv 2, lib 3, fetchurl 4, fetchFromGitHub 5, fixDarwinDylibNames 6, abseil-cpp 7, autoconf 8, aws-sdk-cpp 9, boost 10, brotli 11, c-ares 12, cmake 13, crc32c 14, curl 15, flatbuffers 16, gflags 17, glog 18, google-cloud-cpp 19, grpc 20, gtest 21, jemalloc 22, libbacktrace 23, lz4 24, minio 25, ninja 26, nlohmann_json 27, openssl 28, perl 29, protobuf 30, python3 31, rapidjson 32, re2 33, snappy 34, sqlite 35, thrift 36, tzdata 37, utf8proc 38, which 39, zlib 40, zstd 41, enableShared ? !stdenv.hostPlatform.isStatic 42, enableFlight ? true 43, enableJemalloc ? !(stdenv.isAarch64 && stdenv.isDarwin) 44 # boost/process is broken in 1.69 on darwin, but fixed in 1.70 and 45 # non-existent in older versions 46 # see https://github.com/boostorg/process/issues/55 47, enableS3 ? (!stdenv.isDarwin) || (lib.versionOlder boost.version "1.69" || lib.versionAtLeast boost.version "1.70") 48, enableGcs ? !stdenv.isDarwin # google-cloud-cpp is not supported on darwin 49}: 50 51assert lib.asserts.assertMsg 52 ((enableS3 && stdenv.isDarwin) -> (lib.versionOlder boost.version "1.69" || lib.versionAtLeast boost.version "1.70")) 53 "S3 on Darwin requires Boost != 1.69"; 54 55let 56 arrow-testing = fetchFromGitHub { 57 owner = "apache"; 58 repo = "arrow-testing"; 59 rev = "634739c664433cec366b4b9a81d1e1044a8c5eda"; 60 hash = "sha256-r1WVgJJsI7v485L6Qb+5i7kFO4Tvxyk1T0JBb4og6pg="; 61 }; 62 63 parquet-testing = fetchFromGitHub { 64 owner = "apache"; 65 repo = "parquet-testing"; 66 rev = "acd375eb86a81cd856476fca0f52ba6036a067ff"; 67 hash = "sha256-z/kmi+4dBO/dsVkJA4NgUoxl0pXi8RWIGvI8MGu/gcc="; 68 }; 69 70in 71stdenv.mkDerivation rec { 72 pname = "arrow-cpp"; 73 version = "8.0.0"; 74 75 src = fetchurl { 76 url = "mirror://apache/arrow/arrow-${version}/apache-arrow-${version}.tar.gz"; 77 hash = "sha256-rZoFcFEXyYnBFrrprHBJL+AVBQ4bgPsOOP3ktdhjqqM="; 78 }; 79 sourceRoot = "apache-arrow-${version}/cpp"; 80 81 ${if enableJemalloc then "ARROW_JEMALLOC_URL" else null} = jemalloc.src; 82 83 # versions are all taken from 84 # https://github.com/apache/arrow/blob/apache-arrow-8.0.0/cpp/thirdparty/versions.txt 85 86 ARROW_MIMALLOC_URL = fetchFromGitHub { 87 owner = "microsoft"; 88 repo = "mimalloc"; 89 rev = "v1.7.3"; 90 hash = "sha256-Ca877VitpWyKmZNHavqgewk/P+tyd2xHDNVqveKh87M="; 91 }; 92 93 ARROW_XSIMD_URL = fetchFromGitHub { 94 owner = "xtensor-stack"; 95 repo = "xsimd"; 96 rev = "7d1778c3b38d63db7cec7145d939f40bc5d859d1"; 97 hash = "sha256-89AysBUVnTdWyMPazeJegnQ6WEH90Ns7qQInZLMSXY4="; 98 }; 99 100 ARROW_SUBSTRAIT_URL = fetchFromGitHub { 101 owner = "substrait-io"; 102 repo = "substrait"; 103 rev = "e1b4c04a1b518912f4c4065b16a1b2c0ac8e14cf"; 104 hash = "sha256-56FSjDngsROSHLjMv+OYAIYqphEu3GzgIMHbgh/ZQw0="; 105 }; 106 107 patches = [ 108 # patch to fix python-test 109 ./darwin.patch 110 ]; 111 112 nativeBuildInputs = [ 113 cmake 114 ninja 115 autoconf # for vendored jemalloc 116 flatbuffers 117 ] ++ lib.optional stdenv.isDarwin fixDarwinDylibNames; 118 buildInputs = [ 119 boost 120 brotli 121 flatbuffers 122 gflags 123 glog 124 gtest 125 libbacktrace 126 lz4 127 nlohmann_json # alternative JSON parser to rapidjson 128 protobuf # substrait requires protobuf 129 rapidjson 130 re2 131 snappy 132 thrift 133 utf8proc 134 zlib 135 zstd 136 ] ++ lib.optionals enableShared [ 137 python3.pkgs.python 138 python3.pkgs.numpy 139 ] ++ lib.optionals enableFlight [ 140 grpc 141 openssl 142 protobuf 143 ] ++ lib.optionals enableS3 [ aws-sdk-cpp openssl ] 144 ++ lib.optionals enableGcs [ 145 abseil-cpp 146 crc32c 147 curl 148 google-cloud-cpp 149 nlohmann_json 150 ]; 151 152 preConfigure = '' 153 patchShebangs build-support/ 154 substituteInPlace "src/arrow/vendored/datetime/tz.cpp" \ 155 --replace 'discover_tz_dir();' '"${tzdata}/share/zoneinfo";' 156 ''; 157 158 cmakeFlags = [ 159 "-DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON" 160 "-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}" 161 "-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}" 162 "-DARROW_BUILD_TESTS=ON" 163 "-DARROW_BUILD_INTEGRATION=ON" 164 "-DARROW_BUILD_UTILITIES=ON" 165 "-DARROW_EXTRA_ERROR_CONTEXT=ON" 166 "-DARROW_VERBOSE_THIRDPARTY_BUILD=ON" 167 "-DARROW_DEPENDENCY_SOURCE=SYSTEM" 168 "-DThrift_SOURCE=AUTO" # search for Thrift using pkg-config (ThriftConfig.cmake requires OpenSSL and libevent) 169 "-DARROW_DEPENDENCY_USE_SHARED=${if enableShared then "ON" else "OFF"}" 170 "-DARROW_COMPUTE=ON" 171 "-DARROW_CSV=ON" 172 "-DARROW_DATASET=ON" 173 "-DARROW_ENGINE=ON" 174 "-DARROW_FILESYSTEM=ON" 175 "-DARROW_FLIGHT_SQL=${if enableFlight then "ON" else "OFF"}" 176 "-DARROW_HDFS=ON" 177 "-DARROW_IPC=ON" 178 "-DARROW_JEMALLOC=${if enableJemalloc then "ON" else "OFF"}" 179 "-DARROW_JSON=ON" 180 "-DARROW_PLASMA=ON" 181 # Disable Python for static mode because openblas is currently broken there. 182 "-DARROW_PYTHON=${if enableShared then "ON" else "OFF"}" 183 "-DARROW_USE_GLOG=ON" 184 "-DARROW_WITH_BACKTRACE=ON" 185 "-DARROW_WITH_BROTLI=ON" 186 "-DARROW_WITH_LZ4=ON" 187 "-DARROW_WITH_NLOHMANN_JSON=ON" 188 "-DARROW_WITH_SNAPPY=ON" 189 "-DARROW_WITH_UTF8PROC=ON" 190 "-DARROW_WITH_ZLIB=ON" 191 "-DARROW_WITH_ZSTD=ON" 192 "-DARROW_MIMALLOC=ON" 193 # Parquet options: 194 "-DARROW_PARQUET=ON" 195 "-DARROW_SUBSTRAIT=ON" 196 "-DPARQUET_BUILD_EXECUTABLES=ON" 197 "-DARROW_FLIGHT=${if enableFlight then "ON" else "OFF"}" 198 "-DARROW_FLIGHT_TESTING=${if enableFlight then "ON" else "OFF"}" 199 "-DARROW_S3=${if enableS3 then "ON" else "OFF"}" 200 "-DARROW_GCS=${if enableGcs then "ON" else "OFF"}" 201 ] ++ lib.optionals (!enableShared) [ 202 "-DARROW_TEST_LINKAGE=static" 203 ] ++ lib.optionals stdenv.isDarwin [ 204 "-DCMAKE_SKIP_BUILD_RPATH=OFF" # needed for tests 205 "-DCMAKE_INSTALL_RPATH=@loader_path/../lib" # needed for tools executables 206 ] ++ lib.optional (!stdenv.isx86_64) "-DARROW_USE_SIMD=OFF" 207 ++ lib.optional enableS3 "-DAWSSDK_CORE_HEADER_FILE=${aws-sdk-cpp}/include/aws/core/Aws.h"; 208 209 doInstallCheck = true; 210 ARROW_TEST_DATA = lib.optionalString doInstallCheck "${arrow-testing}/data"; 211 PARQUET_TEST_DATA = lib.optionalString doInstallCheck "${parquet-testing}/data"; 212 GTEST_FILTER = 213 let 214 # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11398 215 filteredTests = lib.optionals stdenv.hostPlatform.isAarch64 [ 216 "TestFilterKernelWithNumeric/3.CompareArrayAndFilterRandomNumeric" 217 "TestFilterKernelWithNumeric/7.CompareArrayAndFilterRandomNumeric" 218 "TestCompareKernel.PrimitiveRandomTests" 219 ] ++ lib.optionals enableS3 [ 220 "S3OptionsTest.FromUri" 221 "S3RegionResolutionTest.NonExistentBucket" 222 "S3RegionResolutionTest.PublicBucket" 223 "S3RegionResolutionTest.RestrictedBucket" 224 "TestMinioServer.Connect" 225 "TestS3FS.*" 226 "TestS3FSGeneric.*" 227 ]; 228 in 229 lib.optionalString doInstallCheck "-${builtins.concatStringsSep ":" filteredTests}"; 230 installCheckInputs = [ perl which sqlite ] ++ lib.optional enableS3 minio; 231 installCheckPhase = 232 let 233 excludedTests = lib.optionals stdenv.isDarwin [ 234 # Some plasma tests need to be patched to use a shorter AF_UNIX socket 235 # path on Darwin. See https://github.com/NixOS/nix/pull/1085 236 "plasma-external-store-tests" 237 "plasma-client-tests" 238 ] ++ [ "arrow-gcsfs-test" ]; 239 in 240 '' 241 runHook preInstallCheck 242 243 ctest -L unittest \ 244 --exclude-regex '^(${builtins.concatStringsSep "|" excludedTests})$' 245 246 runHook postInstallCheck 247 ''; 248 249 meta = with lib; { 250 description = "A cross-language development platform for in-memory data"; 251 homepage = "https://arrow.apache.org/"; 252 license = licenses.asl20; 253 platforms = platforms.unix; 254 maintainers = with maintainers; [ tobim veprbl cpcloud ]; 255 }; 256 passthru = { 257 inherit enableFlight enableJemalloc enableS3 enableGcs; 258 }; 259}