Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
1{ 2 lib, 3 stdenv, 4 buildPythonPackage, 5 python, 6 pythonAtLeast, 7 pythonOlder, 8 arrow-cpp, 9 cffi, 10 cloudpickle, 11 cmake, 12 cython_0, 13 fsspec, 14 hypothesis, 15 numpy, 16 pandas, 17 pytestCheckHook, 18 pytest-lazy-fixture, 19 pkg-config, 20 setuptools, 21 setuptools-scm, 22 oldest-supported-numpy, 23}: 24 25let 26 zero_or_one = cond: if cond then 1 else 0; 27in 28 29buildPythonPackage rec { 30 pname = "pyarrow"; 31 inherit (arrow-cpp) version src; 32 pyproject = true; 33 34 disabled = pythonOlder "3.7"; 35 36 sourceRoot = "source/python"; 37 38 nativeBuildInputs = [ 39 cmake 40 cython_0 41 pkg-config 42 setuptools 43 setuptools-scm 44 oldest-supported-numpy 45 ]; 46 47 buildInputs = [ arrow-cpp ]; 48 49 propagatedBuildInputs = [ 50 cffi 51 numpy 52 ]; 53 54 checkInputs = [ 55 cloudpickle 56 fsspec 57 ]; 58 59 nativeCheckInputs = [ 60 hypothesis 61 pandas 62 pytestCheckHook 63 pytest-lazy-fixture 64 ]; 65 66 PYARROW_BUILD_TYPE = "release"; 67 68 PYARROW_WITH_DATASET = zero_or_one true; 69 PYARROW_WITH_FLIGHT = zero_or_one arrow-cpp.enableFlight; 70 PYARROW_WITH_HDFS = zero_or_one true; 71 PYARROW_WITH_PARQUET = zero_or_one true; 72 PYARROW_WITH_PARQUET_ENCRYPTION = zero_or_one true; 73 PYARROW_WITH_S3 = zero_or_one arrow-cpp.enableS3; 74 PYARROW_WITH_GCS = zero_or_one arrow-cpp.enableGcs; 75 PYARROW_BUNDLE_ARROW_CPP_HEADERS = zero_or_one false; 76 77 PYARROW_CMAKE_OPTIONS = [ "-DCMAKE_INSTALL_RPATH=${ARROW_HOME}/lib" ]; 78 79 ARROW_HOME = arrow-cpp; 80 PARQUET_HOME = arrow-cpp; 81 82 ARROW_TEST_DATA = lib.optionalString doCheck arrow-cpp.ARROW_TEST_DATA; 83 84 doCheck = true; 85 86 dontUseCmakeConfigure = true; 87 88 __darwinAllowLocalNetworking = true; 89 90 preBuild = '' 91 export PYARROW_PARALLEL=$NIX_BUILD_CORES 92 ''; 93 94 postInstall = '' 95 # copy the pyarrow C++ header files to the appropriate location 96 pyarrow_include="$out/${python.sitePackages}/pyarrow/include" 97 mkdir -p "$pyarrow_include/arrow/python" 98 find "$PWD/pyarrow/src/arrow" -type f -name '*.h' -exec cp {} "$pyarrow_include/arrow/python" \; 99 ''; 100 101 pytestFlagsArray = 102 [ 103 # A couple of tests are missing fixture imports, luckily pytest offers a 104 # clean solution. 105 "--fixtures pyarrow/tests/conftest.py" 106 # Deselect a single test because pyarrow prints a 2-line error message where 107 # only a single line is expected. The additional line of output comes from 108 # the glog library which is an optional dependency of arrow-cpp that is 109 # enabled in nixpkgs. 110 # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11393 111 "--deselect=pyarrow/tests/test_memory.py::test_env_var" 112 # these tests require access to s3 via the internet 113 "--deselect=pyarrow/tests/test_fs.py::test_resolve_s3_region" 114 "--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws" 115 "--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws_region_selection" 116 "--deselect=pyarrow/tests/test_fs.py::test_s3_options" 117 # Flaky test 118 "--deselect=pyarrow/tests/test_flight.py::test_roundtrip_errors" 119 "--deselect=pyarrow/tests/test_pandas.py::test_threaded_pandas_import" 120 # Flaky test, works locally but not on Hydra 121 "--deselect=pyarrow/tests/test_csv.py::TestThreadedCSVTableRead::test_cancellation" 122 # expects arrow-cpp headers to be bundled 123 "--deselect=pyarrow/tests/test_cpp_internals.py::test_pyarrow_include" 124 ] 125 ++ lib.optionals stdenv.isDarwin [ 126 # Requires loopback networking 127 "--deselect=pyarrow/tests/test_ipc.py::test_socket_" 128 "--deselect=pyarrow/tests/test_flight.py::test_never_sends_data" 129 "--deselect=pyarrow/tests/test_flight.py::test_large_descriptor" 130 "--deselect=pyarrow/tests/test_flight.py::test_large_metadata_client" 131 "--deselect=pyarrow/tests/test_flight.py::test_none_action_side_effect" 132 # fails to compile 133 "--deselect=pyarrow/tests/test_cython.py::test_cython_api" 134 ] 135 ++ lib.optionals (pythonAtLeast "3.11") [ 136 # Repr output is printing number instead of enum name so these tests fail 137 "--deselect=pyarrow/tests/test_fs.py::test_get_file_info" 138 ] 139 ++ lib.optionals stdenv.isLinux [ 140 # this test requires local networking 141 "--deselect=pyarrow/tests/test_fs.py::test_filesystem_from_uri_gcs" 142 ]; 143 144 disabledTests = [ "GcsFileSystem" ]; 145 146 dontUseSetuptoolsCheck = true; 147 148 preCheck = 149 '' 150 shopt -s extglob 151 rm -r pyarrow/!(conftest.py|tests) 152 mv pyarrow/conftest.py pyarrow/tests/parent_conftest.py 153 substituteInPlace pyarrow/tests/conftest.py --replace ..conftest .parent_conftest 154 '' 155 + lib.optionalString stdenv.isDarwin '' 156 # OSError: [Errno 24] Too many open files 157 ulimit -n 1024 158 ''; 159 160 pythonImportsCheck = 161 [ "pyarrow" ] 162 ++ map (module: "pyarrow.${module}") [ 163 "compute" 164 "csv" 165 "dataset" 166 "feather" 167 "flight" 168 "fs" 169 "json" 170 "parquet" 171 ]; 172 173 meta = with lib; { 174 description = "Cross-language development platform for in-memory data"; 175 homepage = "https://arrow.apache.org/"; 176 license = licenses.asl20; 177 platforms = platforms.unix; 178 maintainers = with maintainers; [ 179 veprbl 180 cpcloud 181 ]; 182 }; 183}