at 23.05-pre 261 lines 7.8 kB view raw
1{ stdenv 2, lib 3, fetchurl 4, fetchFromGitHub 5, fixDarwinDylibNames 6, autoconf 7, aws-sdk-cpp 8, boost 9, brotli 10, c-ares 11, cmake 12, crc32c 13, curl 14, flatbuffers 15, gflags 16, glog 17, google-cloud-cpp 18, grpc 19, gtest 20, libbacktrace 21, lz4 22, minio 23, ninja 24, nlohmann_json 25, openssl 26, perl 27, protobuf 28, python3 29, rapidjson 30, re2 31, snappy 32, sqlite 33, thrift 34, tzdata 35, utf8proc 36, which 37, zlib 38, zstd 39, enableShared ? !stdenv.hostPlatform.isStatic 40, enableFlight ? true 41, enableJemalloc ? !stdenv.isDarwin 42 # boost/process is broken in 1.69 on darwin, but fixed in 1.70 and 43 # non-existent in older versions 44 # see https://github.com/boostorg/process/issues/55 45, enableS3 ? (!stdenv.isDarwin) || (lib.versionOlder boost.version "1.69" || lib.versionAtLeast boost.version "1.70") 46, enableGcs ? !stdenv.isDarwin # google-cloud-cpp is not supported on darwin 47}: 48 49assert lib.asserts.assertMsg 50 ((enableS3 && stdenv.isDarwin) -> (lib.versionOlder boost.version "1.69" || lib.versionAtLeast boost.version "1.70")) 51 "S3 on Darwin requires Boost != 1.69"; 52 53let 54 arrow-testing = fetchFromGitHub { 55 owner = "apache"; 56 repo = "arrow-testing"; 57 rev = "5bab2f264a23f5af68f69ea93d24ef1e8e77fc88"; 58 hash = "sha256-Pxx8ohUpXb5u1995IvXmxQMqWiDJ+7LAll/AjQP7ph8="; 59 }; 60 61 parquet-testing = fetchFromGitHub { 62 owner = "apache"; 63 repo = "parquet-testing"; 64 rev = "aafd3fc9df431c2625a514fb46626e5614f1d199"; 65 hash = "sha256-cO5t/mgsbBhbSefx8EMGTyxmgTjhZ8mFujkFQ3p/JS0="; 66 }; 67 68in 69stdenv.mkDerivation rec { 70 pname = "arrow-cpp"; 71 version = "9.0.0"; 72 73 src = fetchurl { 74 url = "mirror://apache/arrow/arrow-${version}/apache-arrow-${version}.tar.gz"; 75 hash = "sha256-qaAz8KNJAomZj0WGgNGVec8HkRcXumWv3my4AHD3qbU="; 76 }; 77 sourceRoot = "apache-arrow-${version}/cpp"; 78 79 # versions are all taken from 80 # https://github.com/apache/arrow/blob/apache-arrow-${version}/cpp/thirdparty/versions.txt 81 82 # jemalloc: arrow uses a custom prefix to prevent default allocator symbol 83 # collisions as well as custom build flags 84 ${if enableJemalloc then "ARROW_JEMALLOC_URL" else null} = fetchurl { 85 url = "https://github.com/jemalloc/jemalloc/releases/download/5.3.0/jemalloc-5.3.0.tar.bz2"; 86 hash = "sha256-LbgtHnEZ3z5xt2QCGbbf6EeJvAU3mDw7esT3GJrs/qo="; 87 }; 88 89 # mimalloc: arrow uses custom build flags for mimalloc 90 ARROW_MIMALLOC_URL = fetchFromGitHub { 91 owner = "microsoft"; 92 repo = "mimalloc"; 93 rev = "v2.0.6"; 94 hash = "sha256-u2ITXABBN/dwU+mCIbL3tN1f4c17aBuSdNTV+Adtohc="; 95 }; 96 97 ARROW_XSIMD_URL = fetchFromGitHub { 98 owner = "xtensor-stack"; 99 repo = "xsimd"; 100 rev = "8.1.0"; 101 hash = "sha256-Aqs6XJkGjAjGAp0PprabSM4m+32M/UXpSHppCHdzaZk="; 102 }; 103 104 ARROW_SUBSTRAIT_URL = fetchFromGitHub { 105 owner = "substrait-io"; 106 repo = "substrait"; 107 rev = "v0.6.0"; 108 hash = "sha256-hxCBomL4Qg9cHLRg9ZiO9k+JVOZXn6f4ikPtK+V9tno="; 109 }; 110 111 patches = [ 112 # patch to fix python-test 113 ./darwin.patch 114 ]; 115 116 nativeBuildInputs = [ 117 cmake 118 ninja 119 autoconf # for vendored jemalloc 120 flatbuffers 121 ] ++ lib.optional stdenv.isDarwin fixDarwinDylibNames; 122 buildInputs = [ 123 boost 124 brotli 125 flatbuffers 126 gflags 127 glog 128 gtest 129 libbacktrace 130 lz4 131 nlohmann_json # alternative JSON parser to rapidjson 132 protobuf # substrait requires protobuf 133 rapidjson 134 re2 135 snappy 136 thrift 137 utf8proc 138 zlib 139 zstd 140 ] ++ lib.optionals enableShared [ 141 python3.pkgs.python 142 python3.pkgs.numpy 143 ] ++ lib.optionals enableFlight [ 144 grpc 145 openssl 146 protobuf 147 ] ++ lib.optionals enableS3 [ aws-sdk-cpp openssl ] 148 ++ lib.optionals enableGcs [ 149 crc32c 150 curl 151 google-cloud-cpp grpc 152 nlohmann_json 153 ]; 154 155 preConfigure = '' 156 patchShebangs build-support/ 157 substituteInPlace "src/arrow/vendored/datetime/tz.cpp" \ 158 --replace 'discover_tz_dir();' '"${tzdata}/share/zoneinfo";' 159 ''; 160 161 cmakeFlags = [ 162 "-DARROW_BUILD_SHARED=${if enableShared then "ON" else "OFF"}" 163 "-DARROW_BUILD_STATIC=${if enableShared then "OFF" else "ON"}" 164 "-DARROW_BUILD_TESTS=ON" 165 "-DARROW_BUILD_INTEGRATION=ON" 166 "-DARROW_BUILD_UTILITIES=ON" 167 "-DARROW_EXTRA_ERROR_CONTEXT=ON" 168 "-DARROW_VERBOSE_THIRDPARTY_BUILD=ON" 169 "-DARROW_DEPENDENCY_SOURCE=SYSTEM" 170 "-Dxsimd_SOURCE=AUTO" 171 "-DARROW_DEPENDENCY_USE_SHARED=${if enableShared then "ON" else "OFF"}" 172 "-DARROW_COMPUTE=ON" 173 "-DARROW_CSV=ON" 174 "-DARROW_DATASET=ON" 175 "-DARROW_ENGINE=ON" 176 "-DARROW_FILESYSTEM=ON" 177 "-DARROW_FLIGHT_SQL=${if enableFlight then "ON" else "OFF"}" 178 "-DARROW_HDFS=ON" 179 "-DARROW_IPC=ON" 180 "-DARROW_JEMALLOC=${if enableJemalloc then "ON" else "OFF"}" 181 "-DARROW_JSON=ON" 182 "-DARROW_PLASMA=ON" 183 # Disable Python for static mode because openblas is currently broken there. 184 "-DARROW_PYTHON=${if enableShared then "ON" else "OFF"}" 185 "-DARROW_USE_GLOG=ON" 186 "-DARROW_WITH_BACKTRACE=ON" 187 "-DARROW_WITH_BROTLI=ON" 188 "-DARROW_WITH_LZ4=ON" 189 "-DARROW_WITH_NLOHMANN_JSON=ON" 190 "-DARROW_WITH_SNAPPY=ON" 191 "-DARROW_WITH_UTF8PROC=ON" 192 "-DARROW_WITH_ZLIB=ON" 193 "-DARROW_WITH_ZSTD=ON" 194 "-DARROW_MIMALLOC=ON" 195 # Parquet options: 196 "-DARROW_PARQUET=ON" 197 "-DARROW_SUBSTRAIT=ON" 198 "-DPARQUET_BUILD_EXECUTABLES=ON" 199 "-DARROW_FLIGHT=${if enableFlight then "ON" else "OFF"}" 200 "-DARROW_FLIGHT_TESTING=${if enableFlight then "ON" else "OFF"}" 201 "-DARROW_S3=${if enableS3 then "ON" else "OFF"}" 202 "-DARROW_GCS=${if enableGcs then "ON" else "OFF"}" 203 ] ++ lib.optionals (!enableShared) [ 204 "-DARROW_TEST_LINKAGE=static" 205 ] ++ lib.optionals stdenv.isDarwin [ 206 "-DCMAKE_INSTALL_RPATH=@loader_path/../lib" # needed for tools executables 207 ] ++ lib.optional (!stdenv.isx86_64) "-DARROW_USE_SIMD=OFF" 208 ++ lib.optional enableS3 "-DAWSSDK_CORE_HEADER_FILE=${aws-sdk-cpp}/include/aws/core/Aws.h"; 209 210 doInstallCheck = true; 211 ARROW_TEST_DATA = lib.optionalString doInstallCheck "${arrow-testing}/data"; 212 PARQUET_TEST_DATA = lib.optionalString doInstallCheck "${parquet-testing}/data"; 213 GTEST_FILTER = 214 let 215 # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11398 216 filteredTests = lib.optionals stdenv.hostPlatform.isAarch64 [ 217 "TestFilterKernelWithNumeric/3.CompareArrayAndFilterRandomNumeric" 218 "TestFilterKernelWithNumeric/7.CompareArrayAndFilterRandomNumeric" 219 "TestCompareKernel.PrimitiveRandomTests" 220 ] ++ lib.optionals enableS3 [ 221 "S3OptionsTest.FromUri" 222 "S3RegionResolutionTest.NonExistentBucket" 223 "S3RegionResolutionTest.PublicBucket" 224 "S3RegionResolutionTest.RestrictedBucket" 225 "TestMinioServer.Connect" 226 "TestS3FS.*" 227 "TestS3FSGeneric.*" 228 ]; 229 in 230 lib.optionalString doInstallCheck "-${builtins.concatStringsSep ":" filteredTests}"; 231 __darwinAllowLocalNetworking = true; 232 installCheckInputs = [ perl which sqlite ] ++ lib.optional enableS3 minio; 233 installCheckPhase = 234 let 235 excludedTests = lib.optionals stdenv.isDarwin [ 236 # Some plasma tests need to be patched to use a shorter AF_UNIX socket 237 # path on Darwin. See https://github.com/NixOS/nix/pull/1085 238 "plasma-external-store-tests" 239 "plasma-client-tests" 240 ] ++ [ "arrow-gcsfs-test" ]; 241 in 242 '' 243 runHook preInstallCheck 244 245 ctest -L unittest \ 246 --exclude-regex '^(${builtins.concatStringsSep "|" excludedTests})$' 247 248 runHook postInstallCheck 249 ''; 250 251 meta = with lib; { 252 description = "A cross-language development platform for in-memory data"; 253 homepage = "https://arrow.apache.org/docs/cpp/"; 254 license = licenses.asl20; 255 platforms = platforms.unix; 256 maintainers = with maintainers; [ tobim veprbl cpcloud ]; 257 }; 258 passthru = { 259 inherit enableFlight enableJemalloc enableS3 enableGcs; 260 }; 261}