at 25.11-pre 8.3 kB view raw
1{ 2 lib, 3 stdenv, 4 buildPythonPackage, 5 cargo, 6 cmake, 7 fetchFromGitHub, 8 pkg-config, 9 pkgs, # zstd hidden by python3Packages.zstd 10 pytestCheckHook, 11 pytest-codspeed ? null, # Not in Nixpkgs 12 pytest-cov, 13 pytest-xdist, 14 pytest-benchmark, 15 rustc, 16 rustPlatform, 17 runCommand, 18 19 mimalloc, 20 jemalloc, 21 rust-jemalloc-sys, 22 # Another alternative is to try `mimalloc` 23 polarsMemoryAllocator ? mimalloc, # polarsJemalloc, 24 polarsJemalloc ? 25 let 26 jemalloc' = rust-jemalloc-sys.override { 27 jemalloc = jemalloc.override { 28 # "libjemalloc.so.2: cannot allocate memory in static TLS block" 29 30 # https://github.com/pola-rs/polars/issues/5401#issuecomment-1300998316 31 disableInitExecTls = true; 32 }; 33 }; 34 in 35 assert builtins.elem "--disable-initial-exec-tls" jemalloc'.configureFlags; 36 jemalloc', 37 38 polars, 39 python, 40}: 41 42let 43 version = "1.27.1"; 44 45 # Hide symbols to prevent accidental use 46 rust-jemalloc-sys = throw "polars: use polarsMemoryAllocator over rust-jemalloc-sys"; 47 jemalloc = throw "polars: use polarsMemoryAllocator over jemalloc"; 48in 49 50buildPythonPackage rec { 51 pname = "polars"; 52 inherit version; 53 54 src = fetchFromGitHub { 55 owner = "pola-rs"; 56 repo = "polars"; 57 tag = "py-${version}"; 58 hash = "sha256-/VigBBjZglPleXB9jhWHtA+y7WixjboVbzslprZ/A98="; 59 }; 60 61 # Do not type-check assertions because some of them use unstable features (`is_none_or`) 62 postPatch = '' 63 while IFS= read -r -d "" path ; do 64 sed -i 's \(\s*\)debug_assert! \1#[cfg(debug_assertions)]\n\1debug_assert! ' "$path" 65 done < <( find -iname '*.rs' -print0 ) 66 ''; 67 68 cargoDeps = rustPlatform.fetchCargoVendor { 69 inherit pname version src; 70 hash = "sha256-dbPhEMhfe8DZO1D8U+3W1goNK1TAVyLzXHwXzzRvASw="; 71 }; 72 73 requiredSystemFeatures = [ "big-parallel" ]; 74 75 build-system = [ rustPlatform.maturinBuildHook ]; 76 77 nativeBuildInputs = [ 78 cargo 79 pkg-config 80 cmake # libz-ng-sys 81 rustPlatform.cargoSetupHook 82 rustPlatform.cargoBuildHook 83 rustPlatform.cargoInstallHook 84 rustc 85 ]; 86 87 buildInputs = [ 88 polarsMemoryAllocator 89 (pkgs.__splicedPackages.zstd or pkgs.zstd) 90 ]; 91 92 env = { 93 ZSTD_SYS_USE_PKG_CONFIG = true; 94 95 # https://github.com/NixOS/nixpkgs/blob/5c38beb516f8da3a823d94b746dd3bf3c6b9bbd7/doc/languages-frameworks/rust.section.md#using-community-maintained-rust-toolchains-using-community-maintained-rust-toolchains 96 # https://discourse.nixos.org/t/nixpkgs-rustplatform-and-nightly/22870 97 RUSTC_BOOTSTRAP = true; 98 99 # Several `debug_assert!` statements use the unstable `Option::is_none_or` method 100 RUSTFLAGS = lib.concatStringsSep " " ( 101 [ 102 "-Cdebug_assertions=n" 103 ] 104 ++ lib.optionals (polarsMemoryAllocator.pname == "mimalloc") [ 105 "--cfg use_mimalloc" 106 ] 107 ); 108 RUST_BACKTRACE = true; 109 }; 110 111 dontUseCmakeConfigure = true; 112 113 maturinBuildFlags = [ 114 "-m" 115 "py-polars/Cargo.toml" 116 ]; 117 118 postInstall = '' 119 # Move polars.abi3.so -> polars.so 120 local polarsSo="" 121 local soName="" 122 while IFS= read -r -d "" p ; do 123 polarsSo=$p 124 soName="$(basename "$polarsSo")" 125 [[ "$soName" == polars.so ]] && break 126 done < <( find "$out" -iname "polars*.so" -print0 ) 127 [[ -z "''${polarsSo:-}" ]] && echo "polars.so not found" >&2 && exit 1 128 if [[ "$soName" != polars.so ]] ; then 129 mv "$polarsSo" "$(dirname "$polarsSo")/polars.so" 130 fi 131 ''; 132 133 pythonImportsCheck = [ 134 "polars" 135 ]; 136 137 passthru.tests.dynloading-1 = 138 runCommand "polars-dynloading-1" 139 { 140 nativeBuildInputs = [ 141 (python.withPackages (ps: [ 142 ps.pyarrow 143 polars 144 ])) 145 ]; 146 } 147 '' 148 ((LD_DEBUG=libs python) |& tee $out | tail) << \EOF 149 import pyarrow 150 import polars 151 EOF 152 touch $out 153 ''; 154 passthru.tests.dynloading-2 = 155 runCommand "polars-dynloading-2" 156 { 157 nativeBuildInputs = [ 158 (python.withPackages (ps: [ 159 ps.pyarrow 160 polars 161 ])) 162 ]; 163 failureHook = '' 164 sed "s/^/ /" $out >&2 165 ''; 166 } 167 '' 168 ((LD_DEBUG=libs python) |& tee $out | tail) << \EOF 169 import polars 170 import pyarrow 171 EOF 172 ''; 173 passthru.tests.pytest = stdenv.mkDerivation { 174 pname = "${polars.pname}-pytest"; 175 176 inherit (polars) version src; 177 178 requiredSystemFeatures = [ "big-parallel" ]; 179 180 sourceRoot = "${src.name}/py-polars"; 181 postPatch = '' 182 for f in * ; do 183 [[ "$f" == "tests" ]] || \ 184 [[ "$f" == "pyproject.toml" ]] || \ 185 rm -rf "$f" 186 done 187 for pat in "__pycache__" "*.pyc" ; do 188 find -iname "$pat" -exec rm "{}" ";" 189 done 190 ''; 191 dontConfigure = true; 192 dontBuild = true; 193 194 doCheck = true; 195 checkPhase = "pytestCheckPhase"; 196 nativeBuildInputs = [ 197 (python.withPackages (ps: [ 198 polars 199 ps.aiosqlite 200 ps.altair 201 ps.boto3 202 ps.deltalake 203 ps.fastexcel 204 ps.flask 205 ps.flask-cors 206 ps.fsspec 207 ps.gevent 208 ps.hypothesis 209 ps.jax 210 ps.jaxlib 211 (ps.kuzu or null) 212 ps.matplotlib 213 ps.moto 214 ps.nest-asyncio 215 ps.numpy 216 ps.openpyxl 217 ps.pandas 218 ps.pyarrow 219 ps.pydantic 220 ps.pyiceberg 221 ps.sqlalchemy 222 ps.torch 223 ps.xlsx2csv 224 ps.xlsxwriter 225 ps.zstandard 226 ps.cloudpickle 227 ])) 228 ]; 229 nativeCheckInputs = [ 230 pytestCheckHook 231 pytest-codspeed 232 pytest-cov 233 pytest-xdist 234 pytest-benchmark 235 ]; 236 237 pytestFlagsArray = [ 238 "-n auto" 239 "--dist loadgroup" 240 ''-m "slow or not slow"'' 241 ]; 242 disabledTests = [ 243 "test_read_kuzu_graph_database" # kuzu 244 "test_read_database_cx_credentials" # connectorx 245 246 # adbc_driver_.* 247 "test_write_database_append_replace" 248 "test_write_database_create" 249 "test_write_database_create_quoted_tablename" 250 "test_write_database_adbc_temporary_table" 251 "test_write_database_create" 252 "test_write_database_append_replace" 253 "test_write_database_errors" 254 "test_write_database_errors" 255 "test_write_database_create_quoted_tablename" 256 257 # Internet access: 258 "test_read_web_file" 259 "test_run_python_snippets" 260 261 # AssertionError: Series are different (exact value mismatch) 262 "test_reproducible_hash_with_seeds" 263 264 # AssertionError: assert 'PARTITIONED FORCE SPILLED' in 'OOC sort forced\nOOC sort started\nRUN STREAMING PIPELINE\n[df -> sort -> ordered_sink]\nfinished sinking into OOC so... sort took: 365.662µs\nstarted sort source phase\nsort source phase took: 2.169915ms\nfull ooc sort took: 4.502947ms\n' 265 "test_streaming_sort" 266 267 # AssertionError assert sys.getrefcount(foos[0]) == base_count (3 == 2) 268 # tests/unit/dataframe/test_df.py::test_extension 269 "test_extension" 270 271 # Internet access (https://bucket.s3.amazonaws.com/) 272 "test_scan_credential_provider" 273 "test_scan_credential_provider_serialization" 274 275 # ModuleNotFoundError: ADBC 'adbc_driver_sqlite.dbapi' driver not detected. 276 "test_read_database" 277 "test_read_database_parameterised_uri" 278 279 # Untriaged 280 "test_pickle_lazyframe_nested_function_udf" 281 "test_serde_udf" 282 "test_hash_struct" 283 ]; 284 disabledTestPaths = [ 285 "tests/benchmark" 286 "tests/docs" 287 288 # Internet access 289 "tests/unit/io/cloud/test_credential_provider.py" 290 291 # Wrong altair version 292 "tests/unit/operations/namespaces/test_plot.py" 293 294 # adbc 295 "tests/unit/io/database/test_read.py" 296 297 # Untriaged 298 "tests/unit/cloud/test_prepare_cloud_plan.py" 299 "tests/unit/io/cloud/test_cloud.py" 300 ]; 301 302 installPhase = "touch $out"; 303 }; 304 305 meta = { 306 description = "Dataframes powered by a multithreaded, vectorized query engine, written in Rust"; 307 homepage = "https://github.com/pola-rs/polars"; 308 changelog = "https://github.com/pola-rs/polars/releases/tag/py-${version}"; 309 license = lib.licenses.mit; 310 maintainers = with lib.maintainers; [ 311 happysalada 312 SomeoneSerge 313 ]; 314 mainProgram = "polars"; 315 platforms = lib.platforms.all; 316 }; 317}