1{
2 lib,
3 stdenv,
4 buildPythonPackage,
5 fetchFromGitHub,
6
7 # build-system
8 cython,
9 poetry-core,
10 setuptools,
11
12 # dependencies
13 cachetools,
14 click,
15 fsspec,
16 mmh3,
17 pydantic,
18 pyparsing,
19 ray,
20 requests,
21 rich,
22 sortedcontainers,
23 strictyaml,
24 tenacity,
25 zstandard,
26
27 # optional-dependencies
28 adlfs,
29 # getdaft,
30 duckdb,
31 pyarrow,
32 boto3,
33 gcsfs,
34 mypy-boto3-glue,
35 thrift,
36 pandas,
37 s3fs,
38 python-snappy,
39 psycopg2-binary,
40 sqlalchemy,
41
42 # tests
43 azure-core,
44 azure-storage-blob,
45 datafusion,
46 fastavro,
47 moto,
48 pyspark,
49 pytestCheckHook,
50 pytest-lazy-fixture,
51 pytest-mock,
52 pytest-timeout,
53 requests-mock,
54 pythonAtLeast,
55}:
56
57buildPythonPackage rec {
58 pname = "iceberg-python";
59 version = "0.9.1";
60 pyproject = true;
61
62 src = fetchFromGitHub {
63 owner = "apache";
64 repo = "iceberg-python";
65 tag = "pyiceberg-${version}";
66 hash = "sha256-OUj8z/UOIcK0S4tf6Id52YHweNDfYnX6P4nChXrOxqY=";
67 };
68
69 patches = [
70 # Build script fails to build the cython extension on python 3.11 (no issues with python 3.12):
71 # distutils.errors.DistutilsSetupError: each element of 'ext_modules' option must be an Extension instance or 2-tuple
72 # This error vanishes if Cython and setuptools imports are swapped
73 # https://stackoverflow.com/a/53356077/11196710
74 ./reorder-imports-in-build-script.patch
75 ];
76
77 build-system = [
78 cython
79 poetry-core
80 setuptools
81 ];
82
83 # Prevents the cython build to fail silently
84 env.CIBUILDWHEEL = "1";
85
86 pythonRelaxDeps = [
87 "rich"
88 ];
89
90 dependencies = [
91 cachetools
92 click
93 fsspec
94 mmh3
95 pydantic
96 pyparsing
97 ray
98 requests
99 rich
100 sortedcontainers
101 strictyaml
102 tenacity
103 zstandard
104 ];
105
106 optional-dependencies = {
107 adlfs = [
108 adlfs
109 ];
110 daft = [
111 # getdaft
112 ];
113 duckdb = [
114 duckdb
115 pyarrow
116 ];
117 dynamodb = [
118 boto3
119 ];
120 gcsfs = [
121 gcsfs
122 ];
123 glue = [
124 boto3
125 mypy-boto3-glue
126 ];
127 hive = [
128 thrift
129 ];
130 pandas = [
131 pandas
132 pyarrow
133 ];
134 pyarrow = [
135 pyarrow
136 ];
137 ray = [
138 pandas
139 pyarrow
140 ray
141 ];
142 s3fs = [
143 s3fs
144 ];
145 snappy = [
146 python-snappy
147 ];
148 sql-postgres = [
149 psycopg2-binary
150 sqlalchemy
151 ];
152 sql-sqlite = [
153 sqlalchemy
154 ];
155 zstandard = [
156 zstandard
157 ];
158 };
159
160 pythonImportsCheck = [
161 "pyiceberg"
162 # Compiled avro decoder (cython)
163 "pyiceberg.avro.decoder_fast"
164 ];
165
166 nativeCheckInputs = [
167 azure-core
168 azure-storage-blob
169 boto3
170 datafusion
171 fastavro
172 moto
173 mypy-boto3-glue
174 pandas
175 pyarrow
176 pyspark
177 pytest-lazy-fixture
178 pytest-mock
179 pytest-timeout
180 pytestCheckHook
181 requests-mock
182 s3fs
183 sqlalchemy
184 thrift
185 ] ++ moto.optional-dependencies.server;
186
187 pytestFlagsArray = [
188 "-W"
189 # ResourceWarning: unclosed database in <sqlite3.Connection object at 0x7ffe7c6f4220>
190 "ignore::pytest.PytestUnraisableExceptionWarning"
191 ];
192
193 disabledTestPaths = [
194 # Several errors:
195 # - FileNotFoundError: [Errno 2] No such file or directory: '/nix/store/...-python3.12-pyspark-3.5.3/lib/python3.12/site-packages/pyspark/./bin/spark-submit'
196 # - requests.exceptions.ConnectionError: HTTPConnectionPool(host='localhost', port=8181): Max retries exceeded with url: /v1/config
197 # - thrift.transport.TTransport.TTransportException: Could not connect to any of [('127.0.0.1', 9083)]
198 "tests/integration"
199 ];
200
201 disabledTests =
202 [
203 # ModuleNotFoundError: No module named 'puresasl'
204 "test_create_hive_client_with_kerberos"
205 "test_create_hive_client_with_kerberos_using_context_manager"
206
207 # Require unpackaged pyiceberg_core
208 "test_bucket_pyarrow_transforms"
209 "test_transform_consistency_with_pyarrow_transform"
210 "test_truncate_pyarrow_transforms"
211
212 # botocore.exceptions.EndpointConnectionError: Could not connect to the endpoint URL
213 "test_checking_if_a_file_exists"
214 "test_closing_a_file"
215 "test_fsspec_file_tell"
216 "test_fsspec_getting_length_of_file"
217 "test_fsspec_pickle_round_trip_s3"
218 "test_fsspec_raise_on_opening_file_not_found"
219 "test_fsspec_read_specified_bytes_for_file"
220 "test_fsspec_write_and_read_file"
221 "test_writing_avro_file"
222
223 # Require unpackaged gcsfs
224 "test_fsspec_converting_an_outputfile_to_an_inputfile_gcs"
225 "test_fsspec_new_input_file_gcs"
226 "test_fsspec_new_output_file_gcs"
227 "test_fsspec_pickle_roundtrip_gcs"
228
229 # Timeout (network access)
230 "test_fsspec_converting_an_outputfile_to_an_inputfile_adls"
231 "test_fsspec_new_abfss_output_file_adls"
232 "test_fsspec_new_input_file_adls"
233 "test_fsspec_pickle_round_trip_aldfs"
234 "test_partitioned_write"
235 "test_token_200_w_oauth2_server_uri"
236
237 # Hangs forever (from tests/io/test_pyarrow.py)
238 "test_getting_length_of_file_gcs"
239 ]
240 ++ lib.optionals stdenv.hostPlatform.isDarwin [
241 # ImportError: The pyarrow installation is not built with support for 'GcsFileSystem'
242 "test_converting_an_outputfile_to_an_inputfile_gcs"
243 "test_new_input_file_gcs"
244 "test_new_output_file_gc"
245
246 # PermissionError: [Errno 13] Failed to open local file
247 # '/tmp/iceberg/warehouse/default.db/test_projection_partitions/metadata/00000-6c1c61a1-495f-45d3-903d-a2643431be91.metadata.json'
248 "test_identity_transform_column_projection"
249 "test_identity_transform_columns_projection"
250 ]
251 ++ lib.optionals (pythonAtLeast "3.13") [
252 # AssertionError:
253 # assert "Incompatible with StructProtocol: <class 'str'>" in "Unable to initialize struct: <class 'str'>"
254 "test_read_not_struct_type"
255 ];
256
257 __darwinAllowLocalNetworking = true;
258
259 meta = {
260 description = "Python library for programmatic access to Apache Iceberg";
261 homepage = "https://github.com/apache/iceberg-python";
262 changelog = "https://github.com/apache/iceberg-python/releases/tag/pyiceberg-${version}";
263 license = lib.licenses.asl20;
264 maintainers = with lib.maintainers; [ GaetanLepage ];
265 };
266}