1{
2 lib,
3 stdenv,
4 buildPythonPackage,
5 fetchFromGitHub,
6 fetchpatch,
7 pythonOlder,
8
9 # build-system
10 cython,
11 meson-python,
12 meson,
13 pkg-config,
14 versioneer,
15 wheel,
16
17 # propagates
18 numpy,
19 python-dateutil,
20 pytz,
21 tzdata,
22
23 # optionals
24 beautifulsoup4,
25 bottleneck,
26 blosc2,
27 fsspec,
28 gcsfs,
29 html5lib,
30 jinja2,
31 lxml,
32 matplotlib,
33 numba,
34 numexpr,
35 odfpy,
36 openpyxl,
37 psycopg2,
38 pyarrow,
39 pymysql,
40 pyqt5,
41 pyreadstat,
42 qtpy,
43 s3fs,
44 scipy,
45 sqlalchemy,
46 tables,
47 tabulate,
48 xarray,
49 xlrd,
50 xlsxwriter,
51 zstandard,
52
53 # tests
54 adv_cmds,
55 glibc,
56 hypothesis,
57 pytestCheckHook,
58 pytest-xdist,
59 pytest-asyncio,
60 python,
61 runtimeShell,
62}:
63
64let
65 pandas = buildPythonPackage rec {
66 pname = "pandas";
67 version = "2.2.3";
68 pyproject = true;
69
70 disabled = pythonOlder "3.9";
71
72 src = fetchFromGitHub {
73 owner = "pandas-dev";
74 repo = "pandas";
75 tag = "v${version}";
76 hash = "sha256-6YUROcqOV2P1AbJF9IMBIqTt7/PSTeXDwGgE4uI9GME=";
77 };
78
79 patches = [
80 (fetchpatch {
81 name = "musl.patch";
82 url = "https://github.com/pandas-dev/pandas/commit/1e487982ff7501f07e2bba7a7d924fb92b3d5c7f.patch";
83 hash = "sha256-F1pVce1W951Ea82Ux198e5fBFH6kDOG+EeslDTYbjio=";
84 })
85 ];
86
87 # A NOTE regarding the Numpy version relaxing: Both Numpy versions 1.x &
88 # 2.x are supported. However upstream wants to always build with Numpy 2,
89 # and with it to still be able to run with a Numpy 1 or 2. We insist to
90 # perform this substitution even though python3.pkgs.numpy is of version 2
91 # nowadays, because our ecosystem unfortunately doesn't allow easily
92 # separating runtime and build-system dependencies. See also:
93 #
94 # https://discourse.nixos.org/t/several-comments-about-priorities-and-new-policies-in-the-python-ecosystem/51790
95 #
96 # Being able to build (& run) with Numpy 1 helps for python environments
97 # that override globally the `numpy` attribute to point to `numpy_1`.
98 postPatch = ''
99 substituteInPlace pyproject.toml \
100 --replace-fail "numpy>=2.0" numpy \
101 --replace-fail "meson-python==0.13.1" "meson-python>=0.13.1" \
102 --replace-fail "meson==1.2.1" "meson>=1.2.1"
103 '';
104
105 nativeBuildInputs = [
106 cython
107 meson-python
108 meson
109 numpy
110 pkg-config
111 versioneer
112 wheel
113 ] ++ versioneer.optional-dependencies.toml;
114
115 enableParallelBuilding = true;
116
117 propagatedBuildInputs = [
118 numpy
119 python-dateutil
120 pytz
121 tzdata
122 ];
123
124 optional-dependencies =
125 let
126 extras = {
127 aws = [ s3fs ];
128 clipboard = [
129 pyqt5
130 qtpy
131 ];
132 compression = [ zstandard ];
133 computation = [
134 scipy
135 xarray
136 ];
137 excel = [
138 odfpy
139 openpyxl
140 # TODO: pyxlsb
141 xlrd
142 xlsxwriter
143 ];
144 feather = [ pyarrow ];
145 fss = [ fsspec ];
146 gcp = [
147 gcsfs
148 # TODO: pandas-gqb
149 ];
150 hdf5 = [
151 blosc2
152 tables
153 ];
154 html = [
155 beautifulsoup4
156 html5lib
157 lxml
158 ];
159 mysql = [
160 sqlalchemy
161 pymysql
162 ];
163 output_formatting = [
164 jinja2
165 tabulate
166 ];
167 parquet = [ pyarrow ];
168 performance = [
169 bottleneck
170 numba
171 numexpr
172 ];
173 plot = [ matplotlib ];
174 postgresql = [
175 sqlalchemy
176 psycopg2
177 ];
178 spss = [ pyreadstat ];
179 sql-other = [ sqlalchemy ];
180 xml = [ lxml ];
181 };
182 in
183 extras // { all = lib.concatLists (lib.attrValues extras); };
184
185 doCheck = false; # various infinite recursions
186
187 passthru.tests.pytest = pandas.overridePythonAttrs (_: {
188 doCheck = true;
189 });
190
191 nativeCheckInputs =
192 [
193 hypothesis
194 pytest-asyncio
195 pytest-xdist
196 pytestCheckHook
197 ]
198 ++ lib.flatten (lib.attrValues optional-dependencies)
199 ++ lib.optionals (stdenv.hostPlatform.isLinux) [
200 # for locale executable
201 glibc
202 ]
203 ++ lib.optionals (stdenv.hostPlatform.isDarwin) [
204 # for locale executable
205 adv_cmds
206 ];
207
208 # don't max out build cores, it breaks tests
209 dontUsePytestXdist = true;
210
211 __darwinAllowLocalNetworking = true;
212
213 pytestFlagsArray = [
214 # https://github.com/pandas-dev/pandas/blob/main/test_fast.sh
215 "-m"
216 "'not single_cpu and not slow and not network and not db and not slow_arm'"
217 # https://github.com/pandas-dev/pandas/issues/54907
218 "--no-strict-data-files"
219 "--numprocesses"
220 "4"
221 ];
222
223 disabledTests =
224 [
225 # AssertionError: Did not see expected warning of class 'FutureWarning'
226 "test_parsing_tzlocal_deprecated"
227 ]
228 ++ lib.optionals (stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isAarch64) [
229 # tests/generic/test_finalize.py::test_binops[and_-args4-right] - AssertionError: assert {} == {'a': 1}
230 "test_binops"
231 # These tests are unreliable on aarch64-darwin. See https://github.com/pandas-dev/pandas/issues/38921.
232 "test_rolling"
233 ]
234 ++ lib.optional stdenv.hostPlatform.is32bit [
235 # https://github.com/pandas-dev/pandas/issues/37398
236 "test_rolling_var_numerical_issues"
237 ];
238
239 # Tests have relative paths, and need to reference compiled C extensions
240 # so change directory where `import .test` is able to be resolved
241 preCheck =
242 ''
243 export HOME=$TMPDIR
244 cd $out/${python.sitePackages}/pandas
245 ''
246 # TODO: Get locale and clipboard support working on darwin.
247 # Until then we disable the tests.
248 + lib.optionalString stdenv.hostPlatform.isDarwin ''
249 # Fake the impure dependencies pbpaste and pbcopy
250 echo "#!${runtimeShell}" > pbcopy
251 echo "#!${runtimeShell}" > pbpaste
252 chmod a+x pbcopy pbpaste
253 export PATH=$(pwd):$PATH
254 '';
255
256 pythonImportsCheck = [ "pandas" ];
257
258 meta = with lib; {
259 # pandas devs no longer test i686, it's commonly broken
260 # broken = stdenv.hostPlatform.isi686;
261 changelog = "https://pandas.pydata.org/docs/whatsnew/index.html";
262 description = "Powerful data structures for data analysis, time series, and statistics";
263 downloadPage = "https://github.com/pandas-dev/pandas";
264 homepage = "https://pandas.pydata.org";
265 license = licenses.bsd3;
266 longDescription = ''
267 Flexible and powerful data analysis / manipulation library for
268 Python, providing labeled data structures similar to R data.frame
269 objects, statistical functions, and much more.
270 '';
271 maintainers = with maintainers; [
272 raskin
273 ];
274 };
275 };
276in
277pandas