nixpkgs mirror (for testing) github.com/NixOS/nixpkgs
nix
at python-updates 189 lines 5.5 kB view raw
1{ 2 lib, 3 stdenv, 4 buildPythonPackage, 5 python, 6 pythonAtLeast, 7 arrow-cpp, 8 cffi, 9 cloudpickle, 10 cmake, 11 cython, 12 fsspec, 13 hypothesis, 14 numpy, 15 pandas, 16 pytestCheckHook, 17 pytest-lazy-fixture, 18 pkg-config, 19 setuptools, 20 setuptools-scm, 21 oldest-supported-numpy, 22}: 23 24let 25 zero_or_one = cond: if cond then 1 else 0; 26in 27 28buildPythonPackage rec { 29 pname = "pyarrow"; 30 inherit (arrow-cpp) version src; 31 pyproject = true; 32 33 sourceRoot = "${src.name}/python"; 34 35 nativeBuildInputs = [ 36 cmake 37 cython 38 pkg-config 39 setuptools 40 setuptools-scm 41 oldest-supported-numpy 42 ]; 43 44 buildInputs = [ arrow-cpp ]; 45 46 propagatedBuildInputs = [ 47 cffi 48 numpy 49 ]; 50 51 checkInputs = [ 52 cloudpickle 53 fsspec 54 ]; 55 56 nativeCheckInputs = [ 57 hypothesis 58 pandas 59 pytestCheckHook 60 pytest-lazy-fixture 61 ]; 62 63 PYARROW_BUILD_TYPE = "release"; 64 65 PYARROW_WITH_DATASET = zero_or_one true; 66 PYARROW_WITH_FLIGHT = zero_or_one arrow-cpp.enableFlight; 67 PYARROW_WITH_HDFS = zero_or_one true; 68 PYARROW_WITH_PARQUET = zero_or_one true; 69 PYARROW_WITH_PARQUET_ENCRYPTION = zero_or_one true; 70 PYARROW_WITH_S3 = zero_or_one arrow-cpp.enableS3; 71 PYARROW_WITH_GCS = zero_or_one arrow-cpp.enableGcs; 72 PYARROW_BUNDLE_ARROW_CPP_HEADERS = zero_or_one false; 73 74 PYARROW_CMAKE_OPTIONS = [ "-DCMAKE_INSTALL_RPATH=${ARROW_HOME}/lib" ]; 75 76 ARROW_HOME = arrow-cpp; 77 PARQUET_HOME = arrow-cpp; 78 79 ARROW_TEST_DATA = lib.optionalString doCheck arrow-cpp.ARROW_TEST_DATA; 80 doCheck = true; 81 82 dontUseCmakeConfigure = true; 83 84 __darwinAllowLocalNetworking = true; 85 86 preBuild = '' 87 export PYARROW_PARALLEL=$NIX_BUILD_CORES 88 ''; 89 90 postInstall = '' 91 # copy the pyarrow C++ header files to the appropriate location 92 pyarrow_include="$out/${python.sitePackages}/pyarrow/include" 93 mkdir -p "$pyarrow_include/arrow/python" 94 find "$PWD/pyarrow/src/arrow" -type f -name '*.h' -exec cp {} "$pyarrow_include/arrow/python" \; 95 ''; 96 97 disabledTestPaths = [ 98 # These tests require access to s3 via the internet. 99 "pyarrow/tests/test_fs.py::test_resolve_s3_region" 100 "pyarrow/tests/test_fs.py::test_s3_finalize" 101 "pyarrow/tests/test_fs.py::test_s3_finalize_region_resolver" 102 "pyarrow/tests/test_fs.py::test_s3_real_aws" 103 "pyarrow/tests/test_fs.py::test_s3_real_aws_region_selection" 104 "pyarrow/tests/test_fs.py::test_s3_options" 105 # Flaky test 106 "pyarrow/tests/test_flight.py::test_roundtrip_errors" 107 "pyarrow/tests/test_pandas.py::test_threaded_pandas_import" 108 # Flaky test, works locally but not on Hydra. 109 "pyarrow/tests/test_csv.py::TestThreadedCSVTableRead::test_cancellation" 110 # expects arrow-cpp headers to be bundled. 111 "pyarrow/tests/test_cpp_internals.py::test_pyarrow_include" 112 # Searches for TZDATA in /usr. 113 "pyarrow/tests/test_orc.py::test_example_using_json" 114 # AssertionError: assert 'Europe/Monaco' == 'Europe/Paris' 115 "pyarrow/tests/test_types.py::test_dateutil_tzinfo_to_string" 116 # These fail with xxx_fixture not found. 117 # xxx = unary_func, unary_agg_func, varargs_agg_func 118 "pyarrow/tests/test_substrait.py::test_udf_via_substrait" 119 "pyarrow/tests/test_substrait.py::test_scalar_aggregate_udf_basic" 120 "pyarrow/tests/test_substrait.py::test_hash_aggregate_udf_basic" 121 "pyarrow/tests/test_udf.py::test_hash_agg_basic" 122 "pyarrow/tests/test_udf.py::test_hash_agg_empty" 123 "pyarrow/tests/test_udf.py::test_input_lifetime" 124 "pyarrow/tests/test_udf.py::test_scalar_agg_basic" 125 "pyarrow/tests/test_udf.py::test_scalar_agg_empty" 126 "pyarrow/tests/test_udf.py::test_scalar_agg_varargs" 127 "pyarrow/tests/test_udf.py::test_scalar_input" 128 "pyarrow/tests/test_udf.py::test_scalar_udf_context" 129 "pyarrow/tests/test_udf.py::test_udf_array_unary" 130 ] 131 ++ lib.optionals stdenv.hostPlatform.isDarwin [ 132 # Requires loopback networking. 133 "pyarrow/tests/test_ipc.py::test_socket_" 134 "pyarrow/tests/test_flight.py::test_never_sends_data" 135 "pyarrow/tests/test_flight.py::test_large_descriptor" 136 "pyarrow/tests/test_flight.py::test_large_metadata_client" 137 "pyarrow/tests/test_flight.py::test_none_action_side_effect" 138 # Fails to compile. 139 "pyarrow/tests/test_cython.py::test_cython_api" 140 ] 141 ++ lib.optionals (pythonAtLeast "3.11") [ 142 # Repr output is printing number instead of enum name so these tests fail 143 "pyarrow/tests/test_fs.py::test_get_file_info" 144 ] 145 ++ lib.optionals stdenv.hostPlatform.isLinux [ 146 # This test requires local networking. 147 "pyarrow/tests/test_fs.py::test_filesystem_from_uri_gcs" 148 ]; 149 150 disabledTests = [ "GcsFileSystem" ]; 151 152 preCheck = '' 153 export PARQUET_TEST_DATA="${arrow-cpp.PARQUET_TEST_DATA}" 154 shopt -s extglob 155 rm -r pyarrow/!(conftest.py|tests) 156 mv pyarrow/conftest.py pyarrow/tests/parent_conftest.py 157 substituteInPlace pyarrow/tests/conftest.py --replace-fail ..conftest .parent_conftest 158 '' 159 + lib.optionalString stdenv.hostPlatform.isDarwin '' 160 # OSError: [Errno 24] Too many open files 161 ulimit -n 1024 162 ''; 163 164 pythonImportsCheck = [ 165 "pyarrow" 166 ] 167 ++ map (module: "pyarrow.${module}") [ 168 "compute" 169 "csv" 170 "dataset" 171 "feather" 172 "flight" 173 "fs" 174 "json" 175 "orc" 176 "parquet" 177 ]; 178 179 meta = { 180 description = "Cross-language development platform for in-memory data"; 181 homepage = "https://arrow.apache.org/"; 182 license = lib.licenses.asl20; 183 platforms = lib.platforms.unix; 184 maintainers = with lib.maintainers; [ 185 veprbl 186 cpcloud 187 ]; 188 }; 189}