1{ lib
2, stdenv
3, buildPythonPackage
4, python
5, pythonOlder
6, arrow-cpp
7, cffi
8, cloudpickle
9, cmake
10, cython
11, fsspec
12, hypothesis
13, numpy
14, pandas
15, pytestCheckHook
16, pytest-lazy-fixture
17, pkg-config
18, scipy
19, setuptools-scm
20, six
21}:
22
23let
24 zero_or_one = cond: if cond then 1 else 0;
25
26 _arrow-cpp = arrow-cpp.override { python3 = python; };
27in
28
29buildPythonPackage rec {
30 pname = "pyarrow";
31 inherit (_arrow-cpp) version src;
32
33 disabled = pythonOlder "3.7";
34
35 sourceRoot = "apache-arrow-${version}/python";
36
37 nativeBuildInputs = [
38 cmake
39 cython
40 pkg-config
41 setuptools-scm
42 ];
43
44 propagatedBuildInputs = [
45 cffi
46 cloudpickle
47 fsspec
48 numpy
49 scipy
50 six
51 ];
52
53 checkInputs = [
54 hypothesis
55 pandas
56 pytestCheckHook
57 pytest-lazy-fixture
58 ];
59
60 PYARROW_BUILD_TYPE = "release";
61
62 PYARROW_WITH_DATASET = zero_or_one true;
63 PYARROW_WITH_FLIGHT = zero_or_one _arrow-cpp.enableFlight;
64 PYARROW_WITH_HDFS = zero_or_one true;
65 PYARROW_WITH_PARQUET = zero_or_one true;
66 PYARROW_WITH_PLASMA = zero_or_one (!stdenv.isDarwin);
67 PYARROW_WITH_S3 = zero_or_one _arrow-cpp.enableS3;
68
69 PYARROW_CMAKE_OPTIONS = [
70 "-DCMAKE_INSTALL_RPATH=${ARROW_HOME}/lib"
71 ];
72
73 ARROW_HOME = _arrow-cpp;
74 PARQUET_HOME = _arrow-cpp;
75
76 ARROW_TEST_DATA = lib.optionalString doCheck _arrow-cpp.ARROW_TEST_DATA;
77
78 doCheck = true;
79
80 dontUseCmakeConfigure = true;
81
82 __darwinAllowLocalNetworking = true;
83
84 preBuild = ''
85 export PYARROW_PARALLEL=$NIX_BUILD_CORES
86 '';
87
88 pytestFlagsArray = [
89 # Deselect a single test because pyarrow prints a 2-line error message where
90 # only a single line is expected. The additional line of output comes from
91 # the glog library which is an optional dependency of arrow-cpp that is
92 # enabled in nixpkgs.
93 # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11393
94 "--deselect=pyarrow/tests/test_memory.py::test_env_var"
95 # these tests require access to s3 via the internet
96 "--deselect=pyarrow/tests/test_fs.py::test_resolve_s3_region"
97 "--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws"
98 "--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws_region_selection"
99 "--deselect=pyarrow/tests/test_fs.py::test_s3_options"
100 # Flaky test
101 "--deselect=pyarrow/tests/test_flight.py::test_roundtrip_errors"
102 "--deselect=pyarrow/tests/test_pandas.py::test_threaded_pandas_import"
103 ] ++ lib.optionals stdenv.isDarwin [
104 # Requires loopback networking
105 "--deselect=pyarrow/tests/test_ipc.py::test_socket_"
106 "--deselect=pyarrow/tests/test_flight.py::test_never_sends_data"
107 "--deselect=pyarrow/tests/test_flight.py::test_large_descriptor"
108 "--deselect=pyarrow/tests/test_flight.py::test_large_metadata_client"
109 "--deselect=pyarrow/tests/test_flight.py::test_none_action_side_effect"
110 ];
111
112 dontUseSetuptoolsCheck = true;
113
114 preCheck = ''
115 shopt -s extglob
116 rm -r pyarrow/!(conftest.py|tests)
117 mv pyarrow/conftest.py pyarrow/tests/parent_conftest.py
118 substituteInPlace pyarrow/tests/conftest.py --replace ..conftest .parent_conftest
119 '' + lib.optionalString stdenv.isDarwin ''
120 # OSError: [Errno 24] Too many open files
121 ulimit -n 1024
122 '';
123
124 pythonImportsCheck = [
125 "pyarrow"
126 ] ++ map (module: "pyarrow.${module}") ([
127 "compute"
128 "csv"
129 "dataset"
130 "feather"
131 "flight"
132 "fs"
133 "hdfs"
134 "json"
135 "parquet"
136 ] ++ lib.optionals (!stdenv.isDarwin) [
137 "plasma"
138 ]);
139
140 meta = with lib; {
141 description = "A cross-language development platform for in-memory data";
142 homepage = "https://arrow.apache.org/";
143 license = licenses.asl20;
144 platforms = platforms.unix;
145 maintainers = with maintainers; [ veprbl cpcloud ];
146 };
147}