nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
1{ lib
2, stdenv
3, buildPythonPackage
4, python
5, isPy3k
6, arrow-cpp
7, cffi
8, cloudpickle
9, cmake
10, cython
11, fsspec
12, hypothesis
13, numpy
14, pandas
15, pytestCheckHook
16, pytest-lazy-fixture
17, pkg-config
18, scipy
19, setuptools-scm
20, six
21}:
22
23let
24 zero_or_one = cond: if cond then 1 else 0;
25
26 _arrow-cpp = arrow-cpp.override { python3 = python; };
27in
28
29buildPythonPackage rec {
30 pname = "pyarrow";
31 disabled = !isPy3k;
32
33 inherit (_arrow-cpp) version src;
34
35 sourceRoot = "apache-arrow-${version}/python";
36
37 nativeBuildInputs = [ cmake cython pkg-config setuptools-scm ];
38 propagatedBuildInputs = [ numpy six cloudpickle scipy fsspec cffi ];
39 checkInputs = [
40 hypothesis
41 pandas
42 pytestCheckHook
43 pytest-lazy-fixture
44 ];
45
46 PYARROW_BUILD_TYPE = "release";
47
48 PYARROW_WITH_DATASET = zero_or_one true;
49 PYARROW_WITH_FLIGHT = zero_or_one _arrow-cpp.enableFlight;
50 PYARROW_WITH_HDFS = zero_or_one true;
51 PYARROW_WITH_PARQUET = zero_or_one true;
52 PYARROW_WITH_PLASMA = zero_or_one (!stdenv.isDarwin);
53 PYARROW_WITH_S3 = zero_or_one _arrow-cpp.enableS3;
54
55 PYARROW_CMAKE_OPTIONS = [
56 "-DCMAKE_INSTALL_RPATH=${ARROW_HOME}/lib"
57 ];
58
59 ARROW_HOME = _arrow-cpp;
60 PARQUET_HOME = _arrow-cpp;
61
62 ARROW_TEST_DATA = lib.optionalString doCheck _arrow-cpp.ARROW_TEST_DATA;
63
64 doCheck = true;
65 dontUseCmakeConfigure = true;
66
67 preBuild = ''
68 export PYARROW_PARALLEL=$NIX_BUILD_CORES
69 '';
70
71 pytestFlagsArray = [
72 # Deselect a single test because pyarrow prints a 2-line error message where
73 # only a single line is expected. The additional line of output comes from
74 # the glog library which is an optional dependency of arrow-cpp that is
75 # enabled in nixpkgs.
76 # Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11393
77 "--deselect=pyarrow/tests/test_memory.py::test_env_var"
78 # these tests require access to s3 via the internet
79 "--deselect=pyarrow/tests/test_fs.py::test_resolve_s3_region"
80 "--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws"
81 "--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws_region_selection"
82 "--deselect=pyarrow/tests/test_fs.py::test_s3_options"
83 ] ++ lib.optionals stdenv.isDarwin [
84 # Requires loopback networking
85 "--deselect=pyarrow/tests/test_ipc.py::test_socket_"
86 ];
87
88 dontUseSetuptoolsCheck = true;
89 preCheck = ''
90 shopt -s extglob
91 rm -r pyarrow/!(tests)
92 '';
93
94 pythonImportsCheck = [ "pyarrow" ] ++ map (module: "pyarrow.${module}") ([
95 "compute"
96 "csv"
97 "dataset"
98 "feather"
99 "flight"
100 "fs"
101 "hdfs"
102 "json"
103 "parquet"
104 ] ++ lib.optionals (!stdenv.isDarwin) [ "plasma" ]);
105
106 meta = with lib; {
107 broken = stdenv.isDarwin;
108 description = "A cross-language development platform for in-memory data";
109 homepage = "https://arrow.apache.org/";
110 license = licenses.asl20;
111 platforms = platforms.unix;
112 maintainers = with maintainers; [ veprbl cpcloud ];
113 };
114}