nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
1{
2 lib,
3 stdenv,
4 build,
5 buildPythonPackage,
6 cargo,
7 cmake,
8 fetchFromGitHub,
9 pkg-config,
10 pkgs, # zstd hidden by python3Packages.zstd
11 pytestCheckHook,
12 pytest-codspeed ? null, # Not in Nixpkgs
13 pytest-cov-stub,
14 pytest-xdist,
15 pytest-benchmark,
16 rustc,
17 rustPlatform,
18 runCommand,
19 setuptools,
20 mimalloc,
21 jemalloc,
22 rust-jemalloc-sys,
23 # Another alternative is to try `mimalloc`
24 polarsMemoryAllocator ? mimalloc, # polarsJemalloc,
25 polarsJemalloc ?
26 let
27 jemalloc' = rust-jemalloc-sys.override {
28 jemalloc = jemalloc.override {
29 # "libjemalloc.so.2: cannot allocate memory in static TLS block"
30
31 # https://github.com/pola-rs/polars/issues/5401#issuecomment-1300998316
32 disableInitExecTls = true;
33 };
34 };
35 in
36 assert builtins.elem "--disable-initial-exec-tls" jemalloc'.configureFlags;
37 jemalloc',
38
39 polars,
40 python,
41}:
42
43let
44 version = "1.36.1";
45
46 # Hide symbols to prevent accidental use
47 rust-jemalloc-sys = throw "polars: use polarsMemoryAllocator over rust-jemalloc-sys";
48 jemalloc = throw "polars: use polarsMemoryAllocator over jemalloc";
49in
50
51buildPythonPackage rec {
52 pname = "polars";
53 inherit version;
54 pyproject = true;
55
56 src = fetchFromGitHub {
57 owner = "pola-rs";
58 repo = "polars";
59 tag = "py-${version}";
60 hash = "sha256-0usMg/xQZOzrLf2gIfNFtzj96cYVzq5gFaKTFLqyfK0=";
61 };
62
63 cargoDeps = rustPlatform.fetchCargoVendor {
64 inherit pname version src;
65 hash = "sha256-20AI4AGSxnmYitQjfwTFwxMBplEqvN4WXPFdoqJBgSg=";
66 };
67
68 requiredSystemFeatures = [ "big-parallel" ];
69
70 build-system = [
71 setuptools
72 build
73 ];
74
75 nativeBuildInputs = [
76 cargo
77 pkg-config
78 cmake # libz-ng-sys
79 rustPlatform.cargoSetupHook
80 rustPlatform.cargoBuildHook
81 rustPlatform.cargoInstallHook
82 rustPlatform.maturinBuildHook
83 rustc
84 ];
85
86 buildInputs = [
87 polarsMemoryAllocator
88 (pkgs.__splicedPackages.zstd or pkgs.zstd)
89 ];
90
91 env = {
92 ZSTD_SYS_USE_PKG_CONFIG = true;
93
94 # https://github.com/NixOS/nixpkgs/blob/5c38beb516f8da3a823d94b746dd3bf3c6b9bbd7/doc/languages-frameworks/rust.section.md#using-community-maintained-rust-toolchains-using-community-maintained-rust-toolchains
95 # https://discourse.nixos.org/t/nixpkgs-rustplatform-and-nightly/22870
96 RUSTC_BOOTSTRAP = true;
97
98 RUSTFLAGS = lib.concatStringsSep " " (
99 lib.optionals (polarsMemoryAllocator.pname == "mimalloc") [
100 "--cfg use_mimalloc"
101 ]
102 );
103 RUST_BACKTRACE = true;
104 };
105
106 dontUseCmakeConfigure = true;
107
108 maturinBuildFlags = [
109 "-m"
110 "py-polars/runtime/polars-runtime-32/Cargo.toml"
111 ];
112
113 # maturin builds `_polars_runtime_32`, and we also need the pure-python `polars` wheel itself
114 preBuild = ''
115 pyproject-build --no-isolation --outdir dist/ --wheel py-polars
116 '';
117
118 # Fails on polars -> polars-runtime-32 dependency between the two wheels
119 dontCheckRuntimeDeps = true;
120
121 pythonImportsCheck = [
122 "polars"
123 ];
124
125 passthru.tests.dynloading-1 =
126 runCommand "polars-dynloading-1"
127 {
128 nativeBuildInputs = [
129 (python.withPackages (ps: [
130 ps.pyarrow
131 polars
132 ]))
133 ];
134 }
135 ''
136 ((LD_DEBUG=libs python) |& tee $out | tail) << \EOF
137 import pyarrow
138 import polars
139 EOF
140 touch $out
141 '';
142 passthru.tests.dynloading-2 =
143 runCommand "polars-dynloading-2"
144 {
145 nativeBuildInputs = [
146 (python.withPackages (ps: [
147 ps.pyarrow
148 polars
149 ]))
150 ];
151 failureHook = ''
152 sed "s/^/ /" $out >&2
153 '';
154 }
155 ''
156 ((LD_DEBUG=libs python) |& tee $out | tail) << \EOF
157 import polars
158 import pyarrow
159 EOF
160 '';
161 passthru.tests.pytest = stdenv.mkDerivation {
162 pname = "${polars.pname}-pytest";
163
164 inherit (polars) version src;
165
166 requiredSystemFeatures = [ "big-parallel" ];
167
168 sourceRoot = "${src.name}/py-polars";
169 postPatch = ''
170 for f in * ; do
171 [[ "$f" == "tests" ]] || \
172 [[ "$f" == "pyproject.toml" ]] || \
173 rm -rf "$f"
174 done
175 for pat in "__pycache__" "*.pyc" ; do
176 find -iname "$pat" -exec rm "{}" ";"
177 done
178 '';
179 dontConfigure = true;
180 dontBuild = true;
181
182 doCheck = true;
183 checkPhase = "pytestCheckPhase";
184 nativeBuildInputs = [
185 (python.withPackages (ps: [
186 polars
187 ps.aiosqlite
188 ps.altair
189 ps.boto3
190 ps.deltalake
191 ps.fastexcel
192 ps.flask
193 ps.flask-cors
194 ps.fsspec
195 ps.gevent
196 ps.hypothesis
197 ps.jax
198 ps.jaxlib
199 (ps.kuzu or null)
200 ps.matplotlib
201 ps.moto
202 ps.nest-asyncio
203 ps.numpy
204 ps.openpyxl
205 ps.orjson
206 ps.pandas
207 ps.pyarrow
208 ps.pydantic
209 ps.pyiceberg
210 ps.sqlalchemy
211 ps.torch
212 ps.xlsx2csv
213 ps.xlsxwriter
214 ps.zstandard
215 ps.cloudpickle
216 ]))
217 ];
218 nativeCheckInputs = [
219 pytestCheckHook
220 pytest-codspeed
221 pytest-cov-stub
222 pytest-xdist
223 pytest-benchmark
224 ];
225
226 pytestFlags = [
227 "--benchmark-disable"
228 "-nauto"
229 "--dist=loadgroup"
230 ];
231 disabledTests = [
232 "test_read_kuzu_graph_database" # kuzu
233 "test_read_database_cx_credentials" # connectorx
234
235 # adbc_driver_.*
236 "test_write_database_append_replace"
237 "test_write_database_create"
238 "test_write_database_create_quoted_tablename"
239 "test_write_database_adbc_temporary_table"
240 "test_write_database_create"
241 "test_write_database_append_replace"
242 "test_write_database_errors"
243 "test_write_database_errors"
244 "test_write_database_create_quoted_tablename"
245
246 # Internet access:
247 "test_read_web_file"
248 "test_run_python_snippets"
249
250 # AssertionError: Series are different (exact value mismatch)
251 "test_reproducible_hash_with_seeds"
252
253 # AssertionError: assert 'PARTITIONED FORCE SPILLED' in 'OOC sort forced\nOOC sort started\nRUN STREAMING PIPELINE\n[df -> sort -> ordered_sink]\nfinished sinking into OOC so... sort took: 365.662µs\nstarted sort source phase\nsort source phase took: 2.169915ms\nfull ooc sort took: 4.502947ms\n'
254 "test_streaming_sort"
255
256 # AssertionError assert sys.getrefcount(foos[0]) == base_count (3 == 2)
257 # tests/unit/dataframe/test_df.py::test_extension
258 "test_extension"
259
260 # Internet access (https://bucket.s3.amazonaws.com/)
261 "test_scan_credential_provider"
262 "test_scan_credential_provider_serialization"
263
264 # Only connecting to localhost, but http URL scheme is disallowed
265 "test_scan_delta_loads_aws_profile_endpoint_url"
266
267 # ModuleNotFoundError: ADBC 'adbc_driver_sqlite.dbapi' driver not detected.
268 "test_read_database"
269 "test_read_database_parameterised_uri"
270
271 # Untriaged
272 "test_async_index_error_25209"
273 "test_parquet_schema_correctness"
274 ];
275 disabledTestPaths = [
276 "tests/benchmark"
277 "tests/docs"
278
279 # Internet access
280 "tests/unit/io/cloud/test_credential_provider.py"
281
282 # adbc
283 "tests/unit/io/database/test_read.py"
284
285 # Requires pydantic 2.12
286 "tests/unit/io/test_iceberg.py"
287 ];
288
289 installPhase = "touch $out";
290 };
291
292 meta = {
293 description = "Dataframes powered by a multithreaded, vectorized query engine, written in Rust";
294 homepage = "https://github.com/pola-rs/polars";
295 changelog = "https://github.com/pola-rs/polars/releases/tag/py-${version}";
296 license = lib.licenses.mit;
297 maintainers = with lib.maintainers; [
298 happysalada
299 SomeoneSerge
300 ];
301 mainProgram = "polars";
302 platforms = lib.platforms.all;
303 };
304}