1{
2 lib,
3 stdenv,
4 buildPythonPackage,
5 fetchFromGitHub,
6 pythonAtLeast,
7 pythonOlder,
8
9 # build-system
10 cython,
11 meson-python,
12 meson,
13 oldest-supported-numpy,
14 pkg-config,
15 versioneer,
16 wheel,
17
18 # propagates
19 numpy,
20 python-dateutil,
21 pytz,
22 tzdata,
23
24 # optionals
25 beautifulsoup4,
26 bottleneck,
27 blosc2,
28 fsspec,
29 gcsfs,
30 html5lib,
31 jinja2,
32 lxml,
33 matplotlib,
34 numba,
35 numexpr,
36 odfpy,
37 openpyxl,
38 psycopg2,
39 pyarrow,
40 pymysql,
41 pyqt5,
42 pyreadstat,
43 qtpy,
44 s3fs,
45 scipy,
46 sqlalchemy,
47 tables,
48 tabulate,
49 xarray,
50 xlrd,
51 xlsxwriter,
52 zstandard,
53
54 # tests
55 adv_cmds,
56 glibc,
57 glibcLocales,
58 hypothesis,
59 pytestCheckHook,
60 pytest-xdist,
61 pytest-asyncio,
62 python,
63 runtimeShell,
64}:
65
66let
67 pandas = buildPythonPackage rec {
68 pname = "pandas";
69 version = "2.2.2";
70 pyproject = true;
71
72 disabled = pythonOlder "3.9";
73
74 src = fetchFromGitHub {
75 owner = "pandas-dev";
76 repo = "pandas";
77 rev = "refs/tags/v${version}";
78 hash = "sha256-+zQKrsJmP3FJeOiYwNH1u96+/ECDHQF39evzur3cKjc=";
79 };
80
81 postPatch = ''
82 substituteInPlace pyproject.toml \
83 --replace-fail "Cython==3.0.5" "Cython>=3.0.5" \
84 --replace-fail "meson-python==0.13.1" "meson-python>=0.13.1" \
85 --replace-fail "meson==1.2.1" "meson>=1.2.1" \
86 --replace-fail "numpy>=2.0.0rc1" "numpy"
87 '';
88
89 nativeBuildInputs =
90 [
91 cython
92 meson-python
93 meson
94 numpy
95 pkg-config
96 versioneer
97 wheel
98 ]
99 ++ versioneer.optional-dependencies.toml
100 ++ lib.optionals (pythonOlder "3.12") [ oldest-supported-numpy ];
101
102 enableParallelBuilding = true;
103
104 propagatedBuildInputs = [
105 numpy
106 python-dateutil
107 pytz
108 tzdata
109 ];
110
111 passthru.optional-dependencies =
112 let
113 extras = {
114 aws = [ s3fs ];
115 clipboard = [
116 pyqt5
117 qtpy
118 ];
119 compression = [ zstandard ];
120 computation = [
121 scipy
122 xarray
123 ];
124 excel = [
125 odfpy
126 openpyxl
127 # TODO: pyxlsb
128 xlrd
129 xlsxwriter
130 ];
131 feather = [ pyarrow ];
132 fss = [ fsspec ];
133 gcp = [
134 gcsfs
135 # TODO: pandas-gqb
136 ];
137 hdf5 = [
138 blosc2
139 tables
140 ];
141 html = [
142 beautifulsoup4
143 html5lib
144 lxml
145 ];
146 mysql = [
147 sqlalchemy
148 pymysql
149 ];
150 output_formatting = [
151 jinja2
152 tabulate
153 ];
154 parquet = [ pyarrow ];
155 performance = [
156 bottleneck
157 numba
158 numexpr
159 ];
160 plot = [ matplotlib ];
161 postgresql = [
162 sqlalchemy
163 psycopg2
164 ];
165 spss = [ pyreadstat ];
166 sql-other = [ sqlalchemy ];
167 xml = [ lxml ];
168 };
169 in
170 extras // { all = lib.concatLists (lib.attrValues extras); };
171
172 doCheck = false; # various infinite recursions
173
174 passthru.tests.pytest = pandas.overridePythonAttrs (_: {
175 doCheck = true;
176 });
177
178 nativeCheckInputs =
179 [
180 glibcLocales
181 hypothesis
182 pytest-asyncio
183 pytest-xdist
184 pytestCheckHook
185 ]
186 ++ lib.flatten (lib.attrValues passthru.optional-dependencies)
187 ++ lib.optionals (stdenv.isLinux) [
188 # for locale executable
189 glibc
190 ]
191 ++ lib.optionals (stdenv.isDarwin) [
192 # for locale executable
193 adv_cmds
194 ];
195
196 # don't max out build cores, it breaks tests
197 dontUsePytestXdist = true;
198
199 __darwinAllowLocalNetworking = true;
200
201 pytestFlagsArray = [
202 # https://github.com/pandas-dev/pandas/blob/main/test_fast.sh
203 "-m"
204 "'not single_cpu and not slow and not network and not db and not slow_arm'"
205 # https://github.com/pandas-dev/pandas/issues/54907
206 "--no-strict-data-files"
207 "--numprocesses"
208 "4"
209 ];
210
211 disabledTests =
212 [
213 # AssertionError: Did not see expected warning of class 'FutureWarning'
214 "test_parsing_tzlocal_deprecated"
215 ]
216 ++ lib.optionals (stdenv.isDarwin && stdenv.isAarch64) [
217 # tests/generic/test_finalize.py::test_binops[and_-args4-right] - AssertionError: assert {} == {'a': 1}
218 "test_binops"
219 # These tests are unreliable on aarch64-darwin. See https://github.com/pandas-dev/pandas/issues/38921.
220 "test_rolling"
221 ]
222 ++ lib.optional stdenv.is32bit [
223 # https://github.com/pandas-dev/pandas/issues/37398
224 "test_rolling_var_numerical_issues"
225 ];
226
227 # Tests have relative paths, and need to reference compiled C extensions
228 # so change directory where `import .test` is able to be resolved
229 preCheck =
230 ''
231 export HOME=$TMPDIR
232 export LC_ALL="en_US.UTF-8"
233 cd $out/${python.sitePackages}/pandas
234 ''
235 # TODO: Get locale and clipboard support working on darwin.
236 # Until then we disable the tests.
237 + lib.optionalString stdenv.isDarwin ''
238 # Fake the impure dependencies pbpaste and pbcopy
239 echo "#!${runtimeShell}" > pbcopy
240 echo "#!${runtimeShell}" > pbpaste
241 chmod a+x pbcopy pbpaste
242 export PATH=$(pwd):$PATH
243 '';
244
245 pythonImportsCheck = [ "pandas" ];
246
247 meta = with lib; {
248 # pandas devs no longer test i686, it's commonly broken
249 # broken = stdenv.isi686;
250 changelog = "https://pandas.pydata.org/docs/whatsnew/index.html";
251 description = "Powerful data structures for data analysis, time series, and statistics";
252 downloadPage = "https://github.com/pandas-dev/pandas";
253 homepage = "https://pandas.pydata.org";
254 license = licenses.bsd3;
255 longDescription = ''
256 Flexible and powerful data analysis / manipulation library for
257 Python, providing labeled data structures similar to R data.frame
258 objects, statistical functions, and much more.
259 '';
260 maintainers = with maintainers; [
261 raskin
262 knedlsepp
263 ];
264 };
265 };
266in
267pandas