1{
2 lib,
3 stdenv,
4 buildPythonPackage,
5 fetchFromGitHub,
6
7 # build-system
8 setuptools,
9 setuptools-scm,
10
11 # dependencies
12 numpy,
13 packaging,
14 pandas,
15 pydantic,
16 typeguard,
17 typing-extensions,
18 typing-inspect,
19
20 # optional-dependencies
21 black,
22 dask,
23 fastapi,
24 geopandas,
25 hypothesis,
26 ibis-framework,
27 pandas-stubs,
28 polars,
29 pyyaml,
30 scipy,
31 shapely,
32
33 # tests
34 duckdb,
35 joblib,
36 pyarrow,
37 pyarrow-hotfix,
38 pytestCheckHook,
39 pytest-asyncio,
40 pythonAtLeast,
41}:
42
43buildPythonPackage rec {
44 pname = "pandera";
45 version = "0.25.0";
46 pyproject = true;
47
48 src = fetchFromGitHub {
49 owner = "unionai-oss";
50 repo = "pandera";
51 tag = "v${version}";
52 hash = "sha256-0YeLeGpunjHRWFvSvz0r2BokM4/eJKXuBajgcGquca4=";
53 };
54
55 build-system = [
56 setuptools
57 setuptools-scm
58 ];
59
60 env.SETUPTOOLS_SCM_PRETEND_VERSION = version;
61
62 dependencies = [
63 packaging
64 pydantic
65 typeguard
66 typing-extensions
67 typing-inspect
68 ];
69
70 optional-dependencies =
71 let
72 dask-dataframe = [ dask ] ++ dask.optional-dependencies.dataframe;
73 extras = {
74 strategies = [ hypothesis ];
75 hypotheses = [ scipy ];
76 io = [
77 pyyaml
78 black
79 #frictionless # not in nixpkgs
80 ];
81 # pyspark expression does not define optional-dependencies.connect:
82 #pyspark = [ pyspark ] ++ pyspark.optional-dependencies.connect;
83 # modin not in nixpkgs:
84 #modin = [
85 # modin
86 # ray
87 #] ++ dask-dataframe;
88 #modin-ray = [
89 # modin
90 # ray
91 #];
92 #modin-dask = [
93 # modin
94 #] ++ dask-dataframe;
95 dask = dask-dataframe;
96 mypy = [ pandas-stubs ];
97 fastapi = [ fastapi ];
98 geopandas = [
99 geopandas
100 shapely
101 ];
102 ibis = [
103 ibis-framework
104 duckdb
105 ];
106 pandas = [
107 numpy
108 pandas
109 ];
110 polars = [ polars ];
111 };
112 in
113 extras // { all = lib.concatLists (lib.attrValues extras); };
114
115 nativeCheckInputs = [
116 pytestCheckHook
117 pytest-asyncio
118 joblib
119 pyarrow
120 pyarrow-hotfix
121 ]
122 ++ optional-dependencies.all;
123
124 disabledTestPaths = [
125 "tests/fastapi/test_app.py" # tries to access network
126 "tests/pandas/test_docs_setting_column_widths.py" # tests doc generation, requires sphinx
127 "tests/modin" # requires modin, not in nixpkgs
128 "tests/mypy/test_pandas_static_type_checking.py" # some typing failures
129 "tests/pyspark" # requires spark
130
131 # KeyError: 'dask'
132 "tests/dask/test_dask.py::test_series_schema"
133 "tests/dask/test_dask_accessor.py::test_dataframe_series_add_schema"
134 ];
135
136 disabledTests =
137 lib.optionals stdenv.hostPlatform.isDarwin [
138 # OOM error on ofborg:
139 "test_engine_geometry_coerce_crs"
140 # pandera.errors.SchemaError: Error while coercing 'geometry' to type geometry
141 "test_schema_dtype_crs_with_coerce"
142 ]
143 ++ lib.optionals (pythonAtLeast "3.13") [
144 # AssertionError: assert DataType(Sparse[float64, nan]) == DataType(Sparse[float64, nan])
145 "test_legacy_default_pandas_extension_dtype"
146 ];
147
148 pythonImportsCheck = [
149 "pandera"
150 "pandera.api"
151 "pandera.config"
152 "pandera.dtypes"
153 "pandera.engines"
154 ];
155
156 meta = {
157 description = "Light-weight, flexible, and expressive statistical data testing library";
158 homepage = "https://pandera.readthedocs.io";
159 changelog = "https://github.com/unionai-oss/pandera/releases/tag/${src.tag}";
160 license = lib.licenses.mit;
161 maintainers = with lib.maintainers; [ bcdarwin ];
162 };
163}