at 23.05-pre 3.7 kB view raw
1{ lib 2, stdenv 3, buildPythonPackage 4, python 5, pythonOlder 6, arrow-cpp 7, cffi 8, cloudpickle 9, cmake 10, cython 11, fsspec 12, hypothesis 13, numpy 14, pandas 15, pytestCheckHook 16, pytest-lazy-fixture 17, pkg-config 18, scipy 19, setuptools-scm 20, six 21}: 22 23let 24 zero_or_one = cond: if cond then 1 else 0; 25 26 _arrow-cpp = arrow-cpp.override { python3 = python; }; 27in 28 29buildPythonPackage rec { 30 pname = "pyarrow"; 31 inherit (_arrow-cpp) version src; 32 33 disabled = pythonOlder "3.7"; 34 35 sourceRoot = "apache-arrow-${version}/python"; 36 37 nativeBuildInputs = [ 38 cmake 39 cython 40 pkg-config 41 setuptools-scm 42 ]; 43 44 propagatedBuildInputs = [ 45 cffi 46 cloudpickle 47 fsspec 48 numpy 49 scipy 50 six 51 ]; 52 53 checkInputs = [ 54 hypothesis 55 pandas 56 pytestCheckHook 57 pytest-lazy-fixture 58 ]; 59 60 PYARROW_BUILD_TYPE = "release"; 61 62 PYARROW_WITH_DATASET = zero_or_one true; 63 PYARROW_WITH_FLIGHT = zero_or_one _arrow-cpp.enableFlight; 64 PYARROW_WITH_HDFS = zero_or_one true; 65 PYARROW_WITH_PARQUET = zero_or_one true; 66 PYARROW_WITH_PLASMA = zero_or_one (!stdenv.isDarwin); 67 PYARROW_WITH_S3 = zero_or_one _arrow-cpp.enableS3; 68 69 PYARROW_CMAKE_OPTIONS = [ 70 "-DCMAKE_INSTALL_RPATH=${ARROW_HOME}/lib" 71 ]; 72 73 ARROW_HOME = _arrow-cpp; 74 PARQUET_HOME = _arrow-cpp; 75 76 ARROW_TEST_DATA = lib.optionalString doCheck _arrow-cpp.ARROW_TEST_DATA; 77 78 doCheck = true; 79 80 dontUseCmakeConfigure = true; 81 82 __darwinAllowLocalNetworking = true; 83 84 preBuild = '' 85 export PYARROW_PARALLEL=$NIX_BUILD_CORES 86 ''; 87 88 pytestFlagsArray = [ 89 # Deselect a single test because pyarrow prints a 2-line error message where 90 # only a single line is expected. The additional line of output comes from 91 # the glog library which is an optional dependency of arrow-cpp that is 92 # enabled in nixpkgs. 93 # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11393 94 "--deselect=pyarrow/tests/test_memory.py::test_env_var" 95 # these tests require access to s3 via the internet 96 "--deselect=pyarrow/tests/test_fs.py::test_resolve_s3_region" 97 "--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws" 98 "--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws_region_selection" 99 "--deselect=pyarrow/tests/test_fs.py::test_s3_options" 100 # Flaky test 101 "--deselect=pyarrow/tests/test_flight.py::test_roundtrip_errors" 102 "--deselect=pyarrow/tests/test_pandas.py::test_threaded_pandas_import" 103 ] ++ lib.optionals stdenv.isDarwin [ 104 # Requires loopback networking 105 "--deselect=pyarrow/tests/test_ipc.py::test_socket_" 106 "--deselect=pyarrow/tests/test_flight.py::test_never_sends_data" 107 "--deselect=pyarrow/tests/test_flight.py::test_large_descriptor" 108 "--deselect=pyarrow/tests/test_flight.py::test_large_metadata_client" 109 "--deselect=pyarrow/tests/test_flight.py::test_none_action_side_effect" 110 ]; 111 112 dontUseSetuptoolsCheck = true; 113 114 preCheck = '' 115 shopt -s extglob 116 rm -r pyarrow/!(conftest.py|tests) 117 mv pyarrow/conftest.py pyarrow/tests/parent_conftest.py 118 substituteInPlace pyarrow/tests/conftest.py --replace ..conftest .parent_conftest 119 '' + lib.optionalString stdenv.isDarwin '' 120 # OSError: [Errno 24] Too many open files 121 ulimit -n 1024 122 ''; 123 124 pythonImportsCheck = [ 125 "pyarrow" 126 ] ++ map (module: "pyarrow.${module}") ([ 127 "compute" 128 "csv" 129 "dataset" 130 "feather" 131 "flight" 132 "fs" 133 "hdfs" 134 "json" 135 "parquet" 136 ] ++ lib.optionals (!stdenv.isDarwin) [ 137 "plasma" 138 ]); 139 140 meta = with lib; { 141 description = "A cross-language development platform for in-memory data"; 142 homepage = "https://arrow.apache.org/"; 143 license = licenses.asl20; 144 platforms = platforms.unix; 145 maintainers = with maintainers; [ veprbl cpcloud ]; 146 }; 147}