1{
2 lib,
3 stdenv,
4 buildPythonPackage,
5 fetchFromGitHub,
6 pythonAtLeast,
7 pythonOlder,
8
9 # build-system
10 cython,
11 meson-python,
12 meson,
13 oldest-supported-numpy,
14 pkg-config,
15 versioneer,
16 wheel,
17
18 # propagates
19 numpy,
20 python-dateutil,
21 pytz,
22 tzdata,
23
24 # optionals
25 beautifulsoup4,
26 bottleneck,
27 blosc2,
28 fsspec,
29 gcsfs,
30 html5lib,
31 jinja2,
32 lxml,
33 matplotlib,
34 numba,
35 numexpr,
36 odfpy,
37 openpyxl,
38 psycopg2,
39 pyarrow,
40 pymysql,
41 pyqt5,
42 pyreadstat,
43 qtpy,
44 s3fs,
45 scipy,
46 sqlalchemy,
47 tables,
48 tabulate,
49 xarray,
50 xlrd,
51 xlsxwriter,
52 zstandard,
53
54 # tests
55 adv_cmds,
56 glibc,
57 glibcLocales,
58 hypothesis,
59 pytestCheckHook,
60 pytest-xdist,
61 pytest-asyncio,
62 python,
63 runtimeShell,
64}:
65
66let
67 pandas = buildPythonPackage rec {
68 pname = "pandas";
69 version = "2.2.1";
70 pyproject = true;
71
72 disabled = pythonOlder "3.9";
73
74 src = fetchFromGitHub {
75 owner = "pandas-dev";
76 repo = "pandas";
77 rev = "refs/tags/v${version}";
78 hash = "sha256-eyVUIYG0KCAEJbh/qZiEjGpdXq7A+2Lab+5bp+7t4cw=";
79 };
80
81 postPatch = ''
82 substituteInPlace pyproject.toml \
83 --replace-fail "Cython==3.0.5" "Cython>=3.0.5" \
84 --replace-fail "meson-python==0.13.1" "meson-python>=0.13.1" \
85 --replace-fail "meson==1.2.1" "meson>=1.2.1"
86 '';
87
88 nativeBuildInputs =
89 [
90 cython
91 meson-python
92 meson
93 numpy
94 pkg-config
95 versioneer
96 wheel
97 ]
98 ++ versioneer.optional-dependencies.toml
99 ++ lib.optionals (pythonOlder "3.12") [ oldest-supported-numpy ];
100
101 enableParallelBuilding = true;
102
103 propagatedBuildInputs = [
104 numpy
105 python-dateutil
106 pytz
107 tzdata
108 ];
109
110 passthru.optional-dependencies =
111 let
112 extras = {
113 aws = [ s3fs ];
114 clipboard = [
115 pyqt5
116 qtpy
117 ];
118 compression = [ zstandard ];
119 computation = [
120 scipy
121 xarray
122 ];
123 excel = [
124 odfpy
125 openpyxl
126 # TODO: pyxlsb
127 xlrd
128 xlsxwriter
129 ];
130 feather = [ pyarrow ];
131 fss = [ fsspec ];
132 gcp = [
133 gcsfs
134 # TODO: pandas-gqb
135 ];
136 hdf5 = [
137 blosc2
138 tables
139 ];
140 html = [
141 beautifulsoup4
142 html5lib
143 lxml
144 ];
145 mysql = [
146 sqlalchemy
147 pymysql
148 ];
149 output_formatting = [
150 jinja2
151 tabulate
152 ];
153 parquet = [ pyarrow ];
154 performance = [
155 bottleneck
156 numba
157 numexpr
158 ];
159 plot = [ matplotlib ];
160 postgresql = [
161 sqlalchemy
162 psycopg2
163 ];
164 spss = [ pyreadstat ];
165 sql-other = [ sqlalchemy ];
166 xml = [ lxml ];
167 };
168 in
169 extras // { all = lib.concatLists (lib.attrValues extras); };
170
171 doCheck = false; # various infinite recursions
172
173 passthru.tests.pytest = pandas.overridePythonAttrs (_: {
174 doCheck = true;
175 });
176
177 nativeCheckInputs =
178 [
179 glibcLocales
180 hypothesis
181 pytest-asyncio
182 pytest-xdist
183 pytestCheckHook
184 ]
185 ++ lib.flatten (lib.attrValues passthru.optional-dependencies)
186 ++ lib.optionals (stdenv.isLinux) [
187 # for locale executable
188 glibc
189 ]
190 ++ lib.optionals (stdenv.isDarwin) [
191 # for locale executable
192 adv_cmds
193 ];
194
195 # don't max out build cores, it breaks tests
196 dontUsePytestXdist = true;
197
198 __darwinAllowLocalNetworking = true;
199
200 pytestFlagsArray = [
201 # https://github.com/pandas-dev/pandas/blob/main/test_fast.sh
202 "-m"
203 "'not single_cpu and not slow and not network and not db and not slow_arm'"
204 # https://github.com/pandas-dev/pandas/issues/54907
205 "--no-strict-data-files"
206 "--numprocesses"
207 "4"
208 ];
209
210 disabledTests =
211 [
212 # AssertionError: Did not see expected warning of class 'FutureWarning'
213 "test_parsing_tzlocal_deprecated"
214 ]
215 ++ lib.optionals (stdenv.isDarwin && stdenv.isAarch64) [
216 # tests/generic/test_finalize.py::test_binops[and_-args4-right] - AssertionError: assert {} == {'a': 1}
217 "test_binops"
218 # These tests are unreliable on aarch64-darwin. See https://github.com/pandas-dev/pandas/issues/38921.
219 "test_rolling"
220 ]
221 ++ lib.optional stdenv.is32bit [
222 # https://github.com/pandas-dev/pandas/issues/37398
223 "test_rolling_var_numerical_issues"
224 ];
225
226 # Tests have relative paths, and need to reference compiled C extensions
227 # so change directory where `import .test` is able to be resolved
228 preCheck =
229 ''
230 export HOME=$TMPDIR
231 export LC_ALL="en_US.UTF-8"
232 cd $out/${python.sitePackages}/pandas
233 ''
234 # TODO: Get locale and clipboard support working on darwin.
235 # Until then we disable the tests.
236 + lib.optionalString stdenv.isDarwin ''
237 # Fake the impure dependencies pbpaste and pbcopy
238 echo "#!${runtimeShell}" > pbcopy
239 echo "#!${runtimeShell}" > pbpaste
240 chmod a+x pbcopy pbpaste
241 export PATH=$(pwd):$PATH
242 '';
243
244 pythonImportsCheck = [ "pandas" ];
245
246 meta = with lib; {
247 # pandas devs no longer test i686, it's commonly broken
248 # broken = stdenv.isi686;
249 changelog = "https://pandas.pydata.org/docs/whatsnew/index.html";
250 description = "Powerful data structures for data analysis, time series, and statistics";
251 downloadPage = "https://github.com/pandas-dev/pandas";
252 homepage = "https://pandas.pydata.org";
253 license = licenses.bsd3;
254 longDescription = ''
255 Flexible and powerful data analysis / manipulation library for
256 Python, providing labeled data structures similar to R data.frame
257 objects, statistical functions, and much more.
258 '';
259 maintainers = with maintainers; [
260 raskin
261 knedlsepp
262 ];
263 };
264 };
265in
266pandas