Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
at gcc-offload 251 lines 5.6 kB view raw
1{ 2 lib, 3 buildPythonPackage, 4 fetchFromGitHub, 5 6 # build-system 7 cython, 8 poetry-core, 9 setuptools, 10 11 # dependencies 12 cachetools, 13 click, 14 fsspec, 15 mmh3, 16 pydantic, 17 pyparsing, 18 ray, 19 requests, 20 rich, 21 sortedcontainers, 22 strictyaml, 23 tenacity, 24 zstandard, 25 26 # optional-dependencies 27 adlfs, 28 # getdaft, 29 duckdb, 30 pyarrow, 31 boto3, 32 gcsfs, 33 mypy-boto3-glue, 34 thrift, 35 pandas, 36 s3fs, 37 python-snappy, 38 psycopg2-binary, 39 sqlalchemy, 40 41 # tests 42 azure-core, 43 azure-storage-blob, 44 fastavro, 45 moto, 46 pyspark, 47 pytestCheckHook, 48 pytest-lazy-fixture, 49 pytest-mock, 50 pytest-timeout, 51 requests-mock, 52 pythonOlder, 53}: 54 55buildPythonPackage rec { 56 pname = "iceberg-python"; 57 version = "0.8.1"; 58 pyproject = true; 59 60 src = fetchFromGitHub { 61 owner = "apache"; 62 repo = "iceberg-python"; 63 tag = "pyiceberg-${version}"; 64 hash = "sha256-L3YlOtzJv9R4TLeJGzfMQ+0nYtQEsqmgNZpW9B6vVAI="; 65 }; 66 67 patches = lib.optionals (pythonOlder "3.12") [ 68 # Build script fails to build the cython extension on python 3.11 (no issues with python 3.12): 69 # distutils.errors.DistutilsSetupError: each element of 'ext_modules' option must be an Extension instance or 2-tuple 70 # This error vanishes if Cython and setuptools imports are swapped 71 # https://stackoverflow.com/a/53356077/11196710 72 ./reorder-imports-in-build-script.patch 73 ]; 74 75 build-system = [ 76 cython 77 poetry-core 78 setuptools 79 ]; 80 81 # Prevents the cython build to fail silently 82 env.CIBUILDWHEEL = "1"; 83 84 dependencies = [ 85 cachetools 86 click 87 fsspec 88 mmh3 89 pydantic 90 pyparsing 91 ray 92 requests 93 rich 94 sortedcontainers 95 strictyaml 96 tenacity 97 zstandard 98 ]; 99 100 optional-dependencies = { 101 adlfs = [ 102 adlfs 103 ]; 104 daft = [ 105 # getdaft 106 ]; 107 duckdb = [ 108 duckdb 109 pyarrow 110 ]; 111 dynamodb = [ 112 boto3 113 ]; 114 gcsfs = [ 115 gcsfs 116 ]; 117 glue = [ 118 boto3 119 mypy-boto3-glue 120 ]; 121 hive = [ 122 thrift 123 ]; 124 pandas = [ 125 pandas 126 pyarrow 127 ]; 128 pyarrow = [ 129 pyarrow 130 ]; 131 ray = [ 132 pandas 133 pyarrow 134 ray 135 ]; 136 s3fs = [ 137 s3fs 138 ]; 139 snappy = [ 140 python-snappy 141 ]; 142 sql-postgres = [ 143 psycopg2-binary 144 sqlalchemy 145 ]; 146 sql-sqlite = [ 147 sqlalchemy 148 ]; 149 zstandard = [ 150 zstandard 151 ]; 152 }; 153 154 pythonImportsCheck = [ 155 "pyiceberg" 156 # Compiled avro decoder (cython) 157 "pyiceberg.avro.decoder_fast" 158 ]; 159 160 nativeCheckInputs = [ 161 azure-core 162 azure-storage-blob 163 boto3 164 fastavro 165 moto 166 mypy-boto3-glue 167 pandas 168 pyarrow 169 pyspark 170 pytest-lazy-fixture 171 pytest-mock 172 pytest-timeout 173 pytestCheckHook 174 requests-mock 175 s3fs 176 sqlalchemy 177 thrift 178 ] ++ moto.optional-dependencies.server; 179 180 disabledTestPaths = [ 181 # Several errors: 182 # - FileNotFoundError: [Errno 2] No such file or directory: '/nix/store/...-python3.12-pyspark-3.5.3/lib/python3.12/site-packages/pyspark/./bin/spark-submit' 183 # - requests.exceptions.ConnectionError: HTTPConnectionPool(host='localhost', port=8181): Max retries exceeded with url: /v1/config 184 # - thrift.transport.TTransport.TTransportException: Could not connect to any of [('127.0.0.1', 9083)] 185 "tests/integration" 186 ]; 187 188 disabledTests = [ 189 # botocore.exceptions.EndpointConnectionError: Could not connect to the endpoint URL 190 "test_checking_if_a_file_exists" 191 "test_closing_a_file" 192 "test_fsspec_file_tell" 193 "test_fsspec_getting_length_of_file" 194 "test_fsspec_pickle_round_trip_s3" 195 "test_fsspec_raise_on_opening_file_not_found" 196 "test_fsspec_read_specified_bytes_for_file" 197 "test_fsspec_write_and_read_file" 198 "test_writing_avro_file" 199 200 # Require unpackaged gcsfs 201 "test_fsspec_converting_an_outputfile_to_an_inputfile_gcs" 202 "test_fsspec_new_input_file_gcs" 203 "test_fsspec_new_output_file_gcs" 204 "test_fsspec_pickle_roundtrip_gcs" 205 206 # Timeout (network access) 207 "test_fsspec_converting_an_outputfile_to_an_inputfile_adls" 208 "test_fsspec_new_abfss_output_file_adls" 209 "test_fsspec_new_input_file_adls" 210 "test_fsspec_pickle_round_trip_aldfs" 211 212 # TypeError: pyarrow.lib.large_list() takes no keyword argument 213 # From tests/io/test_pyarrow_stats.py: 214 "test_bounds" 215 "test_column_metrics_mode" 216 "test_column_sizes" 217 "test_metrics_mode_counts" 218 "test_metrics_mode_full" 219 "test_metrics_mode_non_default_trunc" 220 "test_metrics_mode_none" 221 "test_null_and_nan_counts" 222 "test_offsets" 223 "test_read_missing_statistics" 224 "test_record_count" 225 "test_value_counts" 226 "test_write_and_read_stats_schema" 227 # From tests/io/test_pyarrow.py: 228 "test_list_type_to_pyarrow" 229 "test_projection_add_column" 230 "test_projection_list_of_structs" 231 "test_read_list" 232 "test_schema_compatible_missing_nullable_field_nested" 233 "test_schema_compatible_nested" 234 "test_schema_mismatch_missing_required_field_nested" 235 "test_schema_to_pyarrow_schema_exclude_field_ids" 236 "test_schema_to_pyarrow_schema_include_field_ids" 237 # From tests/io/test_pyarrow_visitor.py 238 "test_round_schema_conversion_nested" 239 240 # Hangs forever (from tests/io/test_pyarrow.py) 241 "test_getting_length_of_file_gcs" 242 ]; 243 244 meta = { 245 description = "Python library for programmatic access to Apache Iceberg"; 246 homepage = "https://github.com/apache/iceberg-python"; 247 changelog = "https://github.com/apache/iceberg-python/releases/tag/pyiceberg-${version}"; 248 license = lib.licenses.asl20; 249 maintainers = with lib.maintainers; [ GaetanLepage ]; 250 }; 251}