Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
at flake-libs 371 lines 13 kB view raw
1{ 2 lib, 3 stdenv, 4 buildPythonPackage, 5 fetchFromGitHub, 6 7 # build-system 8 cython, 9 distlib, 10 grpcio-tools, 11 jinja2, 12 jsonpickle, 13 jsonschema, 14 mypy-protobuf, 15 redis, 16 setuptools, 17 yapf, 18 19 # dependencies 20 crcmod, 21 dill, 22 fastavro, 23 fasteners, 24 grpcio, 25 hdfs, 26 httplib2, 27 numpy, 28 objsize, 29 orjson, 30 proto-plus, 31 protobuf, 32 pyarrow, 33 pydot, 34 pymongo, 35 python-dateutil, 36 pytz, 37 regex, 38 requests, 39 typing-extensions, 40 zstandard, 41 42 # tests 43 python, 44 docstring-parser, 45 freezegun, 46 hypothesis, 47 mock, 48 pandas, 49 parameterized, 50 psycopg2, 51 pyhamcrest, 52 pytest-xdist, 53 pytestCheckHook, 54 pyyaml, 55 requests-mock, 56 scikit-learn, 57 sqlalchemy, 58 tenacity, 59 testcontainers, 60 pythonAtLeast, 61}: 62 63buildPythonPackage rec { 64 pname = "apache-beam"; 65 version = "2.65.0"; 66 pyproject = true; 67 68 src = fetchFromGitHub { 69 owner = "apache"; 70 repo = "beam"; 71 tag = "v${version}"; 72 hash = "sha256-vDW0PVNep+egIZBe4t8IPwLgsQDmoO4rrA4wUoAHzfg="; 73 }; 74 75 pythonRelaxDeps = [ 76 "grpcio" 77 "jsonpickle" 78 79 # As of apache-beam v2.55.1, the requirement is cloudpickle~=2.2.1, but 80 # the current (2024-04-20) nixpkgs's pydot version is 3.0.0. 81 "cloudpickle" 82 83 # See https://github.com/NixOS/nixpkgs/issues/156957 84 "dill" 85 86 "protobuf" 87 88 # As of apache-beam v2.45.0, the requirement is pyarrow<10.0.0,>=0.15.1, but 89 # the current (2023-02-22) nixpkgs's pyarrow version is 11.0.0. 90 "pyarrow" 91 92 "pydot" 93 ]; 94 95 sourceRoot = "${src.name}/sdks/python"; 96 97 build-system = [ 98 cython 99 distlib 100 grpcio-tools 101 jinja2 102 jsonpickle 103 jsonschema 104 mypy-protobuf 105 redis 106 setuptools 107 yapf 108 ]; 109 110 dependencies = [ 111 crcmod 112 dill 113 fastavro 114 fasteners 115 grpcio 116 hdfs 117 httplib2 118 numpy 119 objsize 120 orjson 121 proto-plus 122 protobuf 123 pyarrow 124 pydot 125 pymongo 126 python-dateutil 127 pytz 128 regex 129 requests 130 typing-extensions 131 zstandard 132 ]; 133 134 enableParallelBuilding = true; 135 136 postPatch = '' 137 substituteInPlace pyproject.toml \ 138 --replace-fail "distlib==0.3.7" "distlib" \ 139 --replace-fail "yapf==0.29.0" "yapf" \ 140 --replace-fail "grpcio-tools==1.62.1" "grpcio-tools" \ 141 --replace-fail "mypy-protobuf==3.5.0" "mypy-protobuf" \ 142 --replace-fail "numpy>=1.14.3,<2.3.0" "numpy" 143 144 substituteInPlace setup.py \ 145 --replace-fail " copy_tests_from_docs()" "" 146 ''; 147 148 __darwinAllowLocalNetworking = true; 149 150 pythonImportsCheck = [ "apache_beam" ]; 151 152 nativeCheckInputs = [ 153 docstring-parser 154 freezegun 155 hypothesis 156 mock 157 pandas 158 parameterized 159 psycopg2 160 pyhamcrest 161 pytest-xdist 162 pytestCheckHook 163 pyyaml 164 requests-mock 165 scikit-learn 166 sqlalchemy 167 tenacity 168 testcontainers 169 ]; 170 171 # Make sure we're running the tests for the actually installed 172 # package, so that cython's .so files are available. 173 preCheck = '' 174 cd $out/${python.sitePackages} 175 ''; 176 177 disabledTestPaths = 178 [ 179 # Fails with 180 # _______ ERROR collecting apache_beam/io/external/xlang_jdbcio_it_test.py _______ 181 # apache_beam/io/external/xlang_jdbcio_it_test.py:80: in <module> 182 # class CrossLanguageJdbcIOTest(unittest.TestCase): 183 # apache_beam/io/external/xlang_jdbcio_it_test.py:99: in CrossLanguageJdbcIOTest 184 # container_init: Callable[[], Union[PostgresContainer, MySqlContainer]], 185 # E NameError: name 'MySqlContainer' is not defined 186 # 187 "apache_beam/io/external/xlang_jdbcio_it_test.py" 188 189 # These tests depend on the availability of specific servers backends. 190 "apache_beam/runners/portability/flink_runner_test.py" 191 "apache_beam/runners/portability/samza_runner_test.py" 192 "apache_beam/runners/portability/spark_runner_test.py" 193 194 # Fails starting from dill 0.3.6 because it tries to pickle pytest globals: 195 # https://github.com/uqfoundation/dill/issues/482#issuecomment-1139017499. 196 "apache_beam/transforms/window_test.py" 197 198 # See https://github.com/apache/beam/issues/25390. 199 "apache_beam/coders/slow_coders_test.py" 200 "apache_beam/dataframe/pandas_doctests_test.py" 201 "apache_beam/typehints/typed_pipeline_test.py" 202 "apache_beam/coders/fast_coders_test.py" 203 "apache_beam/dataframe/schemas_test.py" 204 205 # Fails with TypeError: cannot pickle 'EncodedFile' instances 206 # Upstream issue https://github.com/apache/beam/issues/33889 207 "apache_beam/options/pipeline_options_validator_test.py" 208 "apache_beam/yaml/main_test.py" 209 "apache_beam/yaml/programming_guide_test.py" 210 "apache_beam/yaml/readme_test.py" 211 "apache_beam/yaml/yaml_combine_test.py" 212 "apache_beam/yaml/yaml_enrichment_test.py" 213 "apache_beam/yaml/yaml_io_test.py" 214 "apache_beam/yaml/yaml_join_test.py" 215 "apache_beam/yaml/yaml_mapping_test.py" 216 "apache_beam/yaml/yaml_ml_test.py" 217 "apache_beam/yaml/yaml_provider_unit_test.py" 218 219 # FIXME All those fails due to a single- AttributeError: 'MaybeReshuffle' object has no attribute 'side_inputs' 220 # Upstream issue https://github.com/apache/beam/issues/33854 221 "apache_beam/coders/row_coder_test.py" 222 "apache_beam/examples/avro_nyc_trips_test.py" 223 "apache_beam/examples/complete/autocomplete_test.py" 224 "apache_beam/examples/complete/estimate_pi_test.py" 225 "apache_beam/examples/complete/game/game_stats_test.py" 226 "apache_beam/examples/complete/game/hourly_team_score_test.py" 227 "apache_beam/examples/complete/game/leader_board_test.py" 228 "apache_beam/examples/complete/game/user_score_test.py" 229 "apache_beam/examples/complete/tfidf_test.py" 230 "apache_beam/examples/complete/top_wikipedia_sessions_test.py" 231 "apache_beam/examples/cookbook/bigquery_side_input_test.py" 232 "apache_beam/examples/cookbook/bigquery_tornadoes_test.py" 233 "apache_beam/examples/cookbook/coders_test.py" 234 "apache_beam/examples/cookbook/combiners_test.py" 235 "apache_beam/examples/cookbook/custom_ptransform_test.py" 236 "apache_beam/examples/cookbook/filters_test.py" 237 "apache_beam/examples/matrix_power_test.py" 238 "apache_beam/examples/snippets/snippets_test.py" 239 "apache_beam/examples/snippets/transforms/aggregation/approximatequantiles_test.py" 240 "apache_beam/examples/snippets/transforms/aggregation/approximateunique_test.py" 241 "apache_beam/examples/snippets/transforms/aggregation/batchelements_test.py" 242 "apache_beam/examples/snippets/transforms/aggregation/cogroupbykey_test.py" 243 "apache_beam/examples/snippets/transforms/aggregation/combineglobally_test.py" 244 "apache_beam/examples/snippets/transforms/aggregation/combineperkey_test.py" 245 "apache_beam/examples/snippets/transforms/aggregation/combinevalues_test.py" 246 "apache_beam/examples/snippets/transforms/aggregation/count_test.py" 247 "apache_beam/examples/snippets/transforms/aggregation/distinct_test.py" 248 "apache_beam/examples/snippets/transforms/aggregation/groupbykey_test.py" 249 "apache_beam/examples/snippets/transforms/aggregation/groupintobatches_test.py" 250 "apache_beam/examples/snippets/transforms/aggregation/latest_test.py" 251 "apache_beam/examples/snippets/transforms/aggregation/max_test.py" 252 "apache_beam/examples/snippets/transforms/aggregation/mean_test.py" 253 "apache_beam/examples/snippets/transforms/aggregation/min_test.py" 254 "apache_beam/examples/snippets/transforms/aggregation/sample_test.py" 255 "apache_beam/examples/snippets/transforms/aggregation/sum_test.py" 256 "apache_beam/examples/snippets/transforms/aggregation/tolist_test.py" 257 "apache_beam/examples/snippets/transforms/aggregation/top_test.py" 258 "apache_beam/examples/snippets/transforms/elementwise/filter_test.py" 259 "apache_beam/examples/snippets/transforms/elementwise/flatmap_test.py" 260 "apache_beam/examples/snippets/transforms/elementwise/keys_test.py" 261 "apache_beam/examples/snippets/transforms/elementwise/kvswap_test.py" 262 "apache_beam/examples/snippets/transforms/elementwise/map_test.py" 263 "apache_beam/examples/snippets/transforms/elementwise/pardo_test.py" 264 "apache_beam/examples/snippets/transforms/elementwise/partition_test.py" 265 "apache_beam/examples/snippets/transforms/elementwise/regex_test.py" 266 "apache_beam/examples/snippets/transforms/elementwise/tostring_test.py" 267 "apache_beam/examples/snippets/transforms/elementwise/values_test.py" 268 "apache_beam/examples/snippets/transforms/elementwise/withtimestamps_test.py" 269 "apache_beam/examples/snippets/transforms/other/create_test.py" 270 "apache_beam/examples/snippets/transforms/other/flatten_test.py" 271 "apache_beam/examples/snippets/transforms/other/window_test.py" 272 "apache_beam/examples/snippets/util_test.py" 273 "apache_beam/io/avroio_test.py" 274 "apache_beam/io/concat_source_test.py" 275 "apache_beam/io/filebasedsink_test.py" 276 "apache_beam/io/filebasedsource_test.py" 277 "apache_beam/io/fileio_test.py" 278 "apache_beam/io/mongodbio_test.py" 279 "apache_beam/io/parquetio_test.py" 280 "apache_beam/io/sources_test.py" 281 "apache_beam/io/textio_test.py" 282 "apache_beam/io/tfrecordio_test.py" 283 "apache_beam/metrics/metric_test.py" 284 "apache_beam/ml/inference/base_test.py" 285 "apache_beam/ml/inference/sklearn_inference_test.py" 286 "apache_beam/ml/inference/utils_test.py" 287 "apache_beam/ml/rag/chunking/base_test.py" 288 "apache_beam/ml/rag/ingestion/base_test.py" 289 "apache_beam/pipeline_test.py" 290 "apache_beam/runners/direct/direct_runner_test.py" 291 "apache_beam/runners/direct/sdf_direct_runner_test.py" 292 "apache_beam/runners/interactive/interactive_beam_test.py" 293 "apache_beam/runners/interactive/interactive_runner_test.py" 294 "apache_beam/runners/interactive/non_interactive_runner_test.py" 295 "apache_beam/runners/interactive/recording_manager_test.py" 296 "apache_beam/runners/portability/fn_api_runner/translations_test.py" 297 "apache_beam/runners/portability/fn_api_runner/trigger_manager_test.py" 298 "apache_beam/runners/portability/stager_test.py" 299 "apache_beam/testing/synthetic_pipeline_test.py" 300 "apache_beam/testing/test_stream_test.py" 301 "apache_beam/testing/util_test.py" 302 "apache_beam/transforms/combiners_test.py" 303 "apache_beam/transforms/core_test.py" 304 "apache_beam/transforms/create_test.py" 305 "apache_beam/transforms/deduplicate_test.py" 306 "apache_beam/transforms/periodicsequence_test.py" 307 "apache_beam/transforms/ptransform_test.py" 308 "apache_beam/transforms/sideinputs_test.py" 309 "apache_beam/transforms/stats_test.py" 310 "apache_beam/transforms/transforms_keyword_only_args_test.py" 311 "apache_beam/transforms/trigger_test.py" 312 "apache_beam/transforms/userstate_test.py" 313 "apache_beam/transforms/util_test.py" 314 "apache_beam/transforms/write_ptransform_test.py" 315 316 # FIXME AttributeError: 'Namespace' object has no attribute 'test_pipeline_options' 317 # Upstream issue https://github.com/apache/beam/issues/33853 318 "apache_beam/runners/portability/prism_runner_test.py" 319 320 # FIXME ValueError: Unable to run pipeline with requirement: unsupported_requirement 321 # Upstream issuehttps://github.com/apache/beam/issues/33853 322 "apache_beam/yaml/yaml_transform_scope_test.py" 323 "apache_beam/yaml/yaml_transform_test.py" 324 "apache_beam/yaml/yaml_transform_unit_test.py" 325 "apache_beam/yaml/yaml_udf_test.py" 326 "apache_beam/dataframe/frames_test.py" 327 328 # FIXME Those tests do not terminate due to a grpc error (threading issue) 329 # grpc_status:14, grpc_message:"Cancelling all calls"}" 330 # Upstream issue https://github.com/apache/beam/issues/33851 331 "apache_beam/runners/portability/portable_runner_test.py" 332 ] 333 ++ lib.optionals (pythonAtLeast "3.13") [ 334 # > instruction = ofs_table[pc] 335 # E KeyError: 18 336 "apache_beam/typehints/trivial_inference_test.py" 337 ]; 338 339 disabledTests = 340 lib.optionals stdenv.hostPlatform.isDarwin [ 341 # PermissionError: [Errno 13] Permission denied: '/tmp/...' 342 "test_cache_manager_uses_local_ib_cache_root" 343 "test_describe_all_recordings" 344 "test_find_out_correct_user_pipeline" 345 "test_get_cache_manager_creates_cache_manager_if_absent" 346 "test_streaming_cache_uses_local_ib_cache_root" 347 "test_track_user_pipeline_cleanup_non_inspectable_pipeline" 348 ] 349 ++ lib.optionals (pythonAtLeast "3.12") [ 350 # TypeError: Could not determine schema for type hint Any. 351 "test_batching_beam_row_input" 352 "test_auto_convert" 353 "test_unbatching_series" 354 "test_batching_beam_row_to_dataframe" 355 356 # AssertionError: Any != <class 'int'> 357 "test_pycallable_map" 358 "testAlwaysReturnsEarly" 359 360 # TypeError: Expected Iterator in return type annotatio 361 "test_get_output_batch_type" 362 ]; 363 364 meta = { 365 description = "Unified model for defining both batch and streaming data-parallel processing pipelines"; 366 homepage = "https://beam.apache.org/"; 367 changelog = "https://github.com/apache/beam/blob/release-${version}/CHANGES.md"; 368 license = lib.licenses.asl20; 369 maintainers = with lib.maintainers; [ ndl ]; 370 }; 371}