1{
2 lib,
3 stdenv,
4 buildPythonPackage,
5 cargo,
6 cmake,
7 fetchFromGitHub,
8 pkg-config,
9 pkgs, # zstd hidden by python3Packages.zstd
10 pytestCheckHook,
11 pytest-codspeed ? null, # Not in Nixpkgs
12 pytest-cov,
13 pytest-xdist,
14 pytest-benchmark,
15 rustc,
16 rustPlatform,
17 runCommand,
18
19 mimalloc,
20 jemalloc,
21 rust-jemalloc-sys,
22 # Another alternative is to try `mimalloc`
23 polarsMemoryAllocator ? mimalloc, # polarsJemalloc,
24 polarsJemalloc ?
25 let
26 jemalloc' = rust-jemalloc-sys.override {
27 jemalloc = jemalloc.override {
28 # "libjemalloc.so.2: cannot allocate memory in static TLS block"
29
30 # https://github.com/pola-rs/polars/issues/5401#issuecomment-1300998316
31 disableInitExecTls = true;
32 };
33 };
34 in
35 assert builtins.elem "--disable-initial-exec-tls" jemalloc'.configureFlags;
36 jemalloc',
37
38 polars,
39 python,
40}:
41
42let
43 version = "1.27.1";
44
45 # Hide symbols to prevent accidental use
46 rust-jemalloc-sys = throw "polars: use polarsMemoryAllocator over rust-jemalloc-sys";
47 jemalloc = throw "polars: use polarsMemoryAllocator over jemalloc";
48in
49
50buildPythonPackage rec {
51 pname = "polars";
52 inherit version;
53
54 src = fetchFromGitHub {
55 owner = "pola-rs";
56 repo = "polars";
57 tag = "py-${version}";
58 hash = "sha256-/VigBBjZglPleXB9jhWHtA+y7WixjboVbzslprZ/A98=";
59 };
60
61 # Do not type-check assertions because some of them use unstable features (`is_none_or`)
62 postPatch = ''
63 while IFS= read -r -d "" path ; do
64 sed -i 's \(\s*\)debug_assert! \1#[cfg(debug_assertions)]\n\1debug_assert! ' "$path"
65 done < <( find -iname '*.rs' -print0 )
66 '';
67
68 cargoDeps = rustPlatform.fetchCargoVendor {
69 inherit pname version src;
70 hash = "sha256-dbPhEMhfe8DZO1D8U+3W1goNK1TAVyLzXHwXzzRvASw=";
71 };
72
73 requiredSystemFeatures = [ "big-parallel" ];
74
75 build-system = [ rustPlatform.maturinBuildHook ];
76
77 nativeBuildInputs = [
78 cargo
79 pkg-config
80 cmake # libz-ng-sys
81 rustPlatform.cargoSetupHook
82 rustPlatform.cargoBuildHook
83 rustPlatform.cargoInstallHook
84 rustc
85 ];
86
87 buildInputs = [
88 polarsMemoryAllocator
89 (pkgs.__splicedPackages.zstd or pkgs.zstd)
90 ];
91
92 env = {
93 ZSTD_SYS_USE_PKG_CONFIG = true;
94
95 # https://github.com/NixOS/nixpkgs/blob/5c38beb516f8da3a823d94b746dd3bf3c6b9bbd7/doc/languages-frameworks/rust.section.md#using-community-maintained-rust-toolchains-using-community-maintained-rust-toolchains
96 # https://discourse.nixos.org/t/nixpkgs-rustplatform-and-nightly/22870
97 RUSTC_BOOTSTRAP = true;
98
99 # Several `debug_assert!` statements use the unstable `Option::is_none_or` method
100 RUSTFLAGS = lib.concatStringsSep " " (
101 [
102 "-Cdebug_assertions=n"
103 ]
104 ++ lib.optionals (polarsMemoryAllocator.pname == "mimalloc") [
105 "--cfg use_mimalloc"
106 ]
107 );
108 RUST_BACKTRACE = true;
109 };
110
111 dontUseCmakeConfigure = true;
112
113 maturinBuildFlags = [
114 "-m"
115 "py-polars/Cargo.toml"
116 ];
117
118 postInstall = ''
119 # Move polars.abi3.so -> polars.so
120 local polarsSo=""
121 local soName=""
122 while IFS= read -r -d "" p ; do
123 polarsSo=$p
124 soName="$(basename "$polarsSo")"
125 [[ "$soName" == polars.so ]] && break
126 done < <( find "$out" -iname "polars*.so" -print0 )
127 [[ -z "''${polarsSo:-}" ]] && echo "polars.so not found" >&2 && exit 1
128 if [[ "$soName" != polars.so ]] ; then
129 mv "$polarsSo" "$(dirname "$polarsSo")/polars.so"
130 fi
131 '';
132
133 pythonImportsCheck = [
134 "polars"
135 ];
136
137 passthru.tests.dynloading-1 =
138 runCommand "polars-dynloading-1"
139 {
140 nativeBuildInputs = [
141 (python.withPackages (ps: [
142 ps.pyarrow
143 polars
144 ]))
145 ];
146 }
147 ''
148 ((LD_DEBUG=libs python) |& tee $out | tail) << \EOF
149 import pyarrow
150 import polars
151 EOF
152 touch $out
153 '';
154 passthru.tests.dynloading-2 =
155 runCommand "polars-dynloading-2"
156 {
157 nativeBuildInputs = [
158 (python.withPackages (ps: [
159 ps.pyarrow
160 polars
161 ]))
162 ];
163 failureHook = ''
164 sed "s/^/ /" $out >&2
165 '';
166 }
167 ''
168 ((LD_DEBUG=libs python) |& tee $out | tail) << \EOF
169 import polars
170 import pyarrow
171 EOF
172 '';
173 passthru.tests.pytest = stdenv.mkDerivation {
174 pname = "${polars.pname}-pytest";
175
176 inherit (polars) version src;
177
178 requiredSystemFeatures = [ "big-parallel" ];
179
180 sourceRoot = "${src.name}/py-polars";
181 postPatch = ''
182 for f in * ; do
183 [[ "$f" == "tests" ]] || \
184 [[ "$f" == "pyproject.toml" ]] || \
185 rm -rf "$f"
186 done
187 for pat in "__pycache__" "*.pyc" ; do
188 find -iname "$pat" -exec rm "{}" ";"
189 done
190 '';
191 dontConfigure = true;
192 dontBuild = true;
193
194 doCheck = true;
195 checkPhase = "pytestCheckPhase";
196 nativeBuildInputs = [
197 (python.withPackages (ps: [
198 polars
199 ps.aiosqlite
200 ps.altair
201 ps.boto3
202 ps.deltalake
203 ps.fastexcel
204 ps.flask
205 ps.flask-cors
206 ps.fsspec
207 ps.gevent
208 ps.hypothesis
209 ps.jax
210 ps.jaxlib
211 (ps.kuzu or null)
212 ps.matplotlib
213 ps.moto
214 ps.nest-asyncio
215 ps.numpy
216 ps.openpyxl
217 ps.pandas
218 ps.pyarrow
219 ps.pydantic
220 ps.pyiceberg
221 ps.sqlalchemy
222 ps.torch
223 ps.xlsx2csv
224 ps.xlsxwriter
225 ps.zstandard
226 ps.cloudpickle
227 ]))
228 ];
229 nativeCheckInputs = [
230 pytestCheckHook
231 pytest-codspeed
232 pytest-cov
233 pytest-xdist
234 pytest-benchmark
235 ];
236
237 pytestFlagsArray = [
238 "-n auto"
239 "--dist loadgroup"
240 ''-m "slow or not slow"''
241 ];
242 disabledTests = [
243 "test_read_kuzu_graph_database" # kuzu
244 "test_read_database_cx_credentials" # connectorx
245
246 # adbc_driver_.*
247 "test_write_database_append_replace"
248 "test_write_database_create"
249 "test_write_database_create_quoted_tablename"
250 "test_write_database_adbc_temporary_table"
251 "test_write_database_create"
252 "test_write_database_append_replace"
253 "test_write_database_errors"
254 "test_write_database_errors"
255 "test_write_database_create_quoted_tablename"
256
257 # Internet access:
258 "test_read_web_file"
259 "test_run_python_snippets"
260
261 # AssertionError: Series are different (exact value mismatch)
262 "test_reproducible_hash_with_seeds"
263
264 # AssertionError: assert 'PARTITIONED FORCE SPILLED' in 'OOC sort forced\nOOC sort started\nRUN STREAMING PIPELINE\n[df -> sort -> ordered_sink]\nfinished sinking into OOC so... sort took: 365.662µs\nstarted sort source phase\nsort source phase took: 2.169915ms\nfull ooc sort took: 4.502947ms\n'
265 "test_streaming_sort"
266
267 # AssertionError assert sys.getrefcount(foos[0]) == base_count (3 == 2)
268 # tests/unit/dataframe/test_df.py::test_extension
269 "test_extension"
270
271 # Internet access (https://bucket.s3.amazonaws.com/)
272 "test_scan_credential_provider"
273 "test_scan_credential_provider_serialization"
274
275 # ModuleNotFoundError: ADBC 'adbc_driver_sqlite.dbapi' driver not detected.
276 "test_read_database"
277 "test_read_database_parameterised_uri"
278
279 # Untriaged
280 "test_pickle_lazyframe_nested_function_udf"
281 "test_serde_udf"
282 "test_hash_struct"
283 ];
284 disabledTestPaths = [
285 "tests/benchmark"
286 "tests/docs"
287
288 # Internet access
289 "tests/unit/io/cloud/test_credential_provider.py"
290
291 # Wrong altair version
292 "tests/unit/operations/namespaces/test_plot.py"
293
294 # adbc
295 "tests/unit/io/database/test_read.py"
296
297 # Untriaged
298 "tests/unit/cloud/test_prepare_cloud_plan.py"
299 "tests/unit/io/cloud/test_cloud.py"
300 ];
301
302 installPhase = "touch $out";
303 };
304
305 meta = {
306 description = "Dataframes powered by a multithreaded, vectorized query engine, written in Rust";
307 homepage = "https://github.com/pola-rs/polars";
308 changelog = "https://github.com/pola-rs/polars/releases/tag/py-${version}";
309 license = lib.licenses.mit;
310 maintainers = with lib.maintainers; [
311 happysalada
312 SomeoneSerge
313 ];
314 mainProgram = "polars";
315 platforms = lib.platforms.all;
316 };
317}