1{
2 lib,
3 stdenv,
4 buildPythonPackage,
5 fetchFromGitHub,
6
7 # build-system
8 setuptools,
9 setuptools-scm,
10
11 # dependencies
12 numpy,
13 packaging,
14 pandas,
15 pydantic,
16 typeguard,
17 typing-inspect,
18
19 # optional-dependencies
20 black,
21 dask,
22 fastapi,
23 geopandas,
24 hypothesis,
25 pandas-stubs,
26 polars,
27 pyyaml,
28 scipy,
29 shapely,
30
31 # tests
32 joblib,
33 pyarrow,
34 pytestCheckHook,
35 pytest-asyncio,
36 pythonAtLeast,
37}:
38
39buildPythonPackage rec {
40 pname = "pandera";
41 version = "0.23.1";
42 pyproject = true;
43
44 src = fetchFromGitHub {
45 owner = "unionai-oss";
46 repo = "pandera";
47 tag = "v${version}";
48 hash = "sha256-aKyuOA/N5QPv6NoN6OFNSFMuN4+8XMpglVtoDFDJZBs=";
49 };
50
51 build-system = [
52 setuptools
53 setuptools-scm
54 ];
55
56 env.SETUPTOOLS_SCM_PRETEND_VERSION = version;
57
58 dependencies = [
59 numpy
60 packaging
61 pandas
62 pydantic
63 typeguard
64 typing-inspect
65 ];
66
67 optional-dependencies =
68 let
69 dask-dataframe = [ dask ] ++ dask.optional-dependencies.dataframe;
70 extras = {
71 strategies = [ hypothesis ];
72 hypotheses = [ scipy ];
73 io = [
74 pyyaml
75 black
76 #frictionless # not in nixpkgs
77 ];
78 # pyspark expression does not define optional-dependencies.connect:
79 #pyspark = [ pyspark ] ++ pyspark.optional-dependencies.connect;
80 # modin not in nixpkgs:
81 #modin = [
82 # modin
83 # ray
84 #] ++ dask-dataframe;
85 #modin-ray = [
86 # modin
87 # ray
88 #];
89 #modin-dask = [
90 # modin
91 #] ++ dask-dataframe;
92 dask = dask-dataframe;
93 mypy = [ pandas-stubs ];
94 fastapi = [ fastapi ];
95 geopandas = [
96 geopandas
97 shapely
98 ];
99 polars = [ polars ];
100 };
101 in
102 extras // { all = lib.concatLists (lib.attrValues extras); };
103
104 nativeCheckInputs = [
105 pytestCheckHook
106 pytest-asyncio
107 joblib
108 pyarrow
109 ] ++ optional-dependencies.all;
110
111 pytestFlagsArray = [
112 # KeyError: 'dask'
113 "--deselect=tests/dask/test_dask.py::test_series_schema"
114 "--deselect=tests/dask/test_dask_accessor.py::test_dataframe_series_add_schema"
115 ];
116
117 disabledTestPaths = [
118 "tests/fastapi/test_app.py" # tries to access network
119 "tests/core/test_docs_setting_column_widths.py" # tests doc generation, requires sphinx
120 "tests/modin" # requires modin, not in nixpkgs
121 "tests/mypy/test_static_type_checking.py" # some typing failures
122 "tests/pyspark" # requires spark
123 ];
124
125 disabledTests =
126 lib.optionals stdenv.hostPlatform.isDarwin [
127 # OOM error on ofborg:
128 "test_engine_geometry_coerce_crs"
129 # pandera.errors.SchemaError: Error while coercing 'geometry' to type geometry
130 "test_schema_dtype_crs_with_coerce"
131 ]
132 ++ lib.optionals (pythonAtLeast "3.13") [
133 # AssertionError: assert DataType(Sparse[float64, nan]) == DataType(Sparse[float64, nan])
134 "test_legacy_default_pandas_extension_dtype"
135 ];
136
137 pythonImportsCheck = [
138 "pandera"
139 "pandera.api"
140 "pandera.config"
141 "pandera.dtypes"
142 "pandera.engines"
143 ];
144
145 meta = {
146 description = "Light-weight, flexible, and expressive statistical data testing library";
147 homepage = "https://pandera.readthedocs.io";
148 changelog = "https://github.com/unionai-oss/pandera/releases/tag/v${version}";
149 license = lib.licenses.mit;
150 maintainers = with lib.maintainers; [ bcdarwin ];
151 };
152}