1{ 2 lib, 3 stdenv, 4 buildPythonPackage, 5 fetchFromGitHub, 6 7 # build-system 8 cython, 9 poetry-core, 10 setuptools, 11 12 # dependencies 13 cachetools, 14 click, 15 fsspec, 16 mmh3, 17 pydantic, 18 pyparsing, 19 ray, 20 requests, 21 rich, 22 sortedcontainers, 23 strictyaml, 24 tenacity, 25 zstandard, 26 27 # optional-dependencies 28 adlfs, 29 # getdaft, 30 duckdb, 31 pyarrow, 32 boto3, 33 gcsfs, 34 mypy-boto3-glue, 35 thrift, 36 pandas, 37 s3fs, 38 python-snappy, 39 psycopg2-binary, 40 sqlalchemy, 41 42 # tests 43 azure-core, 44 azure-storage-blob, 45 datafusion, 46 fastavro, 47 moto, 48 pyspark, 49 pytestCheckHook, 50 pytest-lazy-fixture, 51 pytest-mock, 52 pytest-timeout, 53 requests-mock, 54 pythonAtLeast, 55}: 56 57buildPythonPackage rec { 58 pname = "iceberg-python"; 59 version = "0.9.1"; 60 pyproject = true; 61 62 src = fetchFromGitHub { 63 owner = "apache"; 64 repo = "iceberg-python"; 65 tag = "pyiceberg-${version}"; 66 hash = "sha256-OUj8z/UOIcK0S4tf6Id52YHweNDfYnX6P4nChXrOxqY="; 67 }; 68 69 patches = [ 70 # Build script fails to build the cython extension on python 3.11 (no issues with python 3.12): 71 # distutils.errors.DistutilsSetupError: each element of 'ext_modules' option must be an Extension instance or 2-tuple 72 # This error vanishes if Cython and setuptools imports are swapped 73 # https://stackoverflow.com/a/53356077/11196710 74 ./reorder-imports-in-build-script.patch 75 ]; 76 77 build-system = [ 78 cython 79 poetry-core 80 setuptools 81 ]; 82 83 # Prevents the cython build to fail silently 84 env.CIBUILDWHEEL = "1"; 85 86 pythonRelaxDeps = [ 87 "rich" 88 ]; 89 90 dependencies = [ 91 cachetools 92 click 93 fsspec 94 mmh3 95 pydantic 96 pyparsing 97 ray 98 requests 99 rich 100 sortedcontainers 101 strictyaml 102 tenacity 103 zstandard 104 ]; 105 106 optional-dependencies = { 107 adlfs = [ 108 adlfs 109 ]; 110 daft = [ 111 # getdaft 112 ]; 113 duckdb = [ 114 duckdb 115 pyarrow 116 ]; 117 dynamodb = [ 118 boto3 119 ]; 120 gcsfs = [ 121 gcsfs 122 ]; 123 glue = [ 124 boto3 125 mypy-boto3-glue 126 ]; 127 hive = [ 128 thrift 129 ]; 130 pandas = [ 131 pandas 132 pyarrow 133 ]; 134 pyarrow = [ 135 pyarrow 136 ]; 137 ray = [ 138 pandas 139 pyarrow 140 ray 141 ]; 142 s3fs = [ 143 s3fs 144 ]; 145 snappy = [ 146 python-snappy 147 ]; 148 sql-postgres = [ 149 psycopg2-binary 150 sqlalchemy 151 ]; 152 sql-sqlite = [ 153 sqlalchemy 154 ]; 155 zstandard = [ 156 zstandard 157 ]; 158 }; 159 160 pythonImportsCheck = [ 161 "pyiceberg" 162 # Compiled avro decoder (cython) 163 "pyiceberg.avro.decoder_fast" 164 ]; 165 166 nativeCheckInputs = [ 167 azure-core 168 azure-storage-blob 169 boto3 170 datafusion 171 fastavro 172 moto 173 mypy-boto3-glue 174 pandas 175 pyarrow 176 pyspark 177 pytest-lazy-fixture 178 pytest-mock 179 pytest-timeout 180 pytestCheckHook 181 requests-mock 182 s3fs 183 sqlalchemy 184 thrift 185 ] ++ moto.optional-dependencies.server; 186 187 pytestFlagsArray = [ 188 "-W" 189 # ResourceWarning: unclosed database in <sqlite3.Connection object at 0x7ffe7c6f4220> 190 "ignore::pytest.PytestUnraisableExceptionWarning" 191 ]; 192 193 disabledTestPaths = [ 194 # Several errors: 195 # - FileNotFoundError: [Errno 2] No such file or directory: '/nix/store/...-python3.12-pyspark-3.5.3/lib/python3.12/site-packages/pyspark/./bin/spark-submit' 196 # - requests.exceptions.ConnectionError: HTTPConnectionPool(host='localhost', port=8181): Max retries exceeded with url: /v1/config 197 # - thrift.transport.TTransport.TTransportException: Could not connect to any of [('127.0.0.1', 9083)] 198 "tests/integration" 199 ]; 200 201 disabledTests = 202 [ 203 # ModuleNotFoundError: No module named 'puresasl' 204 "test_create_hive_client_with_kerberos" 205 "test_create_hive_client_with_kerberos_using_context_manager" 206 207 # Require unpackaged pyiceberg_core 208 "test_bucket_pyarrow_transforms" 209 "test_transform_consistency_with_pyarrow_transform" 210 "test_truncate_pyarrow_transforms" 211 212 # botocore.exceptions.EndpointConnectionError: Could not connect to the endpoint URL 213 "test_checking_if_a_file_exists" 214 "test_closing_a_file" 215 "test_fsspec_file_tell" 216 "test_fsspec_getting_length_of_file" 217 "test_fsspec_pickle_round_trip_s3" 218 "test_fsspec_raise_on_opening_file_not_found" 219 "test_fsspec_read_specified_bytes_for_file" 220 "test_fsspec_write_and_read_file" 221 "test_writing_avro_file" 222 223 # Require unpackaged gcsfs 224 "test_fsspec_converting_an_outputfile_to_an_inputfile_gcs" 225 "test_fsspec_new_input_file_gcs" 226 "test_fsspec_new_output_file_gcs" 227 "test_fsspec_pickle_roundtrip_gcs" 228 229 # Timeout (network access) 230 "test_fsspec_converting_an_outputfile_to_an_inputfile_adls" 231 "test_fsspec_new_abfss_output_file_adls" 232 "test_fsspec_new_input_file_adls" 233 "test_fsspec_pickle_round_trip_aldfs" 234 "test_partitioned_write" 235 "test_token_200_w_oauth2_server_uri" 236 237 # Hangs forever (from tests/io/test_pyarrow.py) 238 "test_getting_length_of_file_gcs" 239 ] 240 ++ lib.optionals stdenv.hostPlatform.isDarwin [ 241 # ImportError: The pyarrow installation is not built with support for 'GcsFileSystem' 242 "test_converting_an_outputfile_to_an_inputfile_gcs" 243 "test_new_input_file_gcs" 244 "test_new_output_file_gc" 245 246 # PermissionError: [Errno 13] Failed to open local file 247 # '/tmp/iceberg/warehouse/default.db/test_projection_partitions/metadata/00000-6c1c61a1-495f-45d3-903d-a2643431be91.metadata.json' 248 "test_identity_transform_column_projection" 249 "test_identity_transform_columns_projection" 250 ] 251 ++ lib.optionals (pythonAtLeast "3.13") [ 252 # AssertionError: 253 # assert "Incompatible with StructProtocol: <class 'str'>" in "Unable to initialize struct: <class 'str'>" 254 "test_read_not_struct_type" 255 ]; 256 257 __darwinAllowLocalNetworking = true; 258 259 meta = { 260 description = "Python library for programmatic access to Apache Iceberg"; 261 homepage = "https://github.com/apache/iceberg-python"; 262 changelog = "https://github.com/apache/iceberg-python/releases/tag/pyiceberg-${version}"; 263 license = lib.licenses.asl20; 264 maintainers = with lib.maintainers; [ GaetanLepage ]; 265 }; 266}