1{ 2 lib, 3 stdenv, 4 buildPythonPackage, 5 fetchFromGitHub, 6 7 # build-system 8 cython, 9 distlib, 10 grpcio-tools, 11 jinja2, 12 jsonpickle, 13 jsonschema, 14 mypy-protobuf, 15 redis, 16 setuptools, 17 yapf, 18 19 # dependencies 20 cloudpickle, 21 crcmod, 22 dill, 23 fastavro, 24 fasteners, 25 grpcio, 26 hdfs, 27 httplib2, 28 numpy, 29 objsize, 30 orjson, 31 proto-plus, 32 protobuf, 33 pyarrow, 34 pydot, 35 pymongo, 36 python-dateutil, 37 pytz, 38 regex, 39 requests, 40 typing-extensions, 41 zstandard, 42 43 # tests 44 python, 45 docstring-parser, 46 freezegun, 47 hypothesis, 48 mock, 49 pandas, 50 parameterized, 51 psycopg2, 52 pyhamcrest, 53 pytest-xdist, 54 pytestCheckHook, 55 pyyaml, 56 requests-mock, 57 scikit-learn, 58 sqlalchemy, 59 tenacity, 60 testcontainers, 61 pythonAtLeast, 62}: 63 64buildPythonPackage rec { 65 pname = "apache-beam"; 66 version = "2.63.0"; 67 pyproject = true; 68 69 src = fetchFromGitHub { 70 owner = "apache"; 71 repo = "beam"; 72 tag = "v${version}"; 73 hash = "sha256-ixJstawgU3UGtNKVzkwMCLkdY7QKTbxNe6JJ7vG+vmA="; 74 }; 75 76 pythonRelaxDeps = [ 77 "grpcio" 78 "jsonpickle" 79 80 # As of apache-beam v2.55.1, the requirement is cloudpickle~=2.2.1, but 81 # the current (2024-04-20) nixpkgs's pydot version is 3.0.0. 82 "cloudpickle" 83 84 # See https://github.com/NixOS/nixpkgs/issues/156957 85 "dill" 86 87 "protobuf" 88 89 # As of apache-beam v2.45.0, the requirement is pyarrow<10.0.0,>=0.15.1, but 90 # the current (2023-02-22) nixpkgs's pyarrow version is 11.0.0. 91 "pyarrow" 92 93 "pydot" 94 ]; 95 96 sourceRoot = "${src.name}/sdks/python"; 97 98 build-system = [ 99 cython 100 distlib 101 grpcio-tools 102 jinja2 103 jsonpickle 104 jsonschema 105 mypy-protobuf 106 redis 107 setuptools 108 yapf 109 ]; 110 111 dependencies = [ 112 cloudpickle 113 crcmod 114 dill 115 fastavro 116 fasteners 117 grpcio 118 hdfs 119 httplib2 120 numpy 121 objsize 122 orjson 123 proto-plus 124 protobuf 125 pyarrow 126 pydot 127 pymongo 128 python-dateutil 129 pytz 130 regex 131 requests 132 typing-extensions 133 zstandard 134 ]; 135 136 enableParallelBuilding = true; 137 138 postPatch = '' 139 substituteInPlace pyproject.toml \ 140 --replace-fail "distlib==0.3.7" "distlib" \ 141 --replace-fail "yapf==0.29.0" "yapf" \ 142 --replace-fail "grpcio-tools==1.62.1" "grpcio-tools" \ 143 --replace-fail "mypy-protobuf==3.5.0" "mypy-protobuf" \ 144 --replace-fail "numpy>=1.14.3,<2.3.0" "numpy" 145 146 substituteInPlace setup.py \ 147 --replace-fail " copy_tests_from_docs()" "" 148 ''; 149 150 __darwinAllowLocalNetworking = true; 151 152 pythonImportsCheck = [ "apache_beam" ]; 153 154 nativeCheckInputs = [ 155 docstring-parser 156 freezegun 157 hypothesis 158 mock 159 pandas 160 parameterized 161 psycopg2 162 pyhamcrest 163 pytest-xdist 164 pytestCheckHook 165 pyyaml 166 requests-mock 167 scikit-learn 168 sqlalchemy 169 tenacity 170 testcontainers 171 ]; 172 173 # Make sure we're running the tests for the actually installed 174 # package, so that cython's .so files are available. 175 preCheck = '' 176 cd $out/${python.sitePackages} 177 ''; 178 179 disabledTestPaths = 180 [ 181 # Fails with 182 # _______ ERROR collecting apache_beam/io/external/xlang_jdbcio_it_test.py _______ 183 # apache_beam/io/external/xlang_jdbcio_it_test.py:80: in <module> 184 # class CrossLanguageJdbcIOTest(unittest.TestCase): 185 # apache_beam/io/external/xlang_jdbcio_it_test.py:99: in CrossLanguageJdbcIOTest 186 # container_init: Callable[[], Union[PostgresContainer, MySqlContainer]], 187 # E NameError: name 'MySqlContainer' is not defined 188 # 189 "apache_beam/io/external/xlang_jdbcio_it_test.py" 190 191 # These tests depend on the availability of specific servers backends. 192 "apache_beam/runners/portability/flink_runner_test.py" 193 "apache_beam/runners/portability/samza_runner_test.py" 194 "apache_beam/runners/portability/spark_runner_test.py" 195 196 # Fails starting from dill 0.3.6 because it tries to pickle pytest globals: 197 # https://github.com/uqfoundation/dill/issues/482#issuecomment-1139017499. 198 "apache_beam/transforms/window_test.py" 199 200 # See https://github.com/apache/beam/issues/25390. 201 "apache_beam/coders/slow_coders_test.py" 202 "apache_beam/dataframe/pandas_doctests_test.py" 203 "apache_beam/typehints/typed_pipeline_test.py" 204 "apache_beam/coders/fast_coders_test.py" 205 "apache_beam/dataframe/schemas_test.py" 206 207 # Fails with TypeError: cannot pickle 'EncodedFile' instances 208 # Upstream issue https://github.com/apache/beam/issues/33889 209 "apache_beam/options/pipeline_options_validator_test.py" 210 "apache_beam/yaml/main_test.py" 211 "apache_beam/yaml/programming_guide_test.py" 212 "apache_beam/yaml/readme_test.py" 213 "apache_beam/yaml/yaml_combine_test.py" 214 "apache_beam/yaml/yaml_enrichment_test.py" 215 "apache_beam/yaml/yaml_io_test.py" 216 "apache_beam/yaml/yaml_join_test.py" 217 "apache_beam/yaml/yaml_mapping_test.py" 218 "apache_beam/yaml/yaml_ml_test.py" 219 "apache_beam/yaml/yaml_provider_unit_test.py" 220 221 # FIXME All those fails due to a single- AttributeError: 'MaybeReshuffle' object has no attribute 'side_inputs' 222 # Upstream issue https://github.com/apache/beam/issues/33854 223 "apache_beam/coders/row_coder_test.py" 224 "apache_beam/examples/avro_nyc_trips_test.py" 225 "apache_beam/examples/complete/autocomplete_test.py" 226 "apache_beam/examples/complete/estimate_pi_test.py" 227 "apache_beam/examples/complete/game/game_stats_test.py" 228 "apache_beam/examples/complete/game/hourly_team_score_test.py" 229 "apache_beam/examples/complete/game/leader_board_test.py" 230 "apache_beam/examples/complete/game/user_score_test.py" 231 "apache_beam/examples/complete/tfidf_test.py" 232 "apache_beam/examples/complete/top_wikipedia_sessions_test.py" 233 "apache_beam/examples/cookbook/bigquery_side_input_test.py" 234 "apache_beam/examples/cookbook/bigquery_tornadoes_test.py" 235 "apache_beam/examples/cookbook/coders_test.py" 236 "apache_beam/examples/cookbook/combiners_test.py" 237 "apache_beam/examples/cookbook/custom_ptransform_test.py" 238 "apache_beam/examples/cookbook/filters_test.py" 239 "apache_beam/examples/matrix_power_test.py" 240 "apache_beam/examples/snippets/snippets_test.py" 241 "apache_beam/examples/snippets/transforms/aggregation/approximatequantiles_test.py" 242 "apache_beam/examples/snippets/transforms/aggregation/approximateunique_test.py" 243 "apache_beam/examples/snippets/transforms/aggregation/batchelements_test.py" 244 "apache_beam/examples/snippets/transforms/aggregation/cogroupbykey_test.py" 245 "apache_beam/examples/snippets/transforms/aggregation/combineglobally_test.py" 246 "apache_beam/examples/snippets/transforms/aggregation/combineperkey_test.py" 247 "apache_beam/examples/snippets/transforms/aggregation/combinevalues_test.py" 248 "apache_beam/examples/snippets/transforms/aggregation/count_test.py" 249 "apache_beam/examples/snippets/transforms/aggregation/distinct_test.py" 250 "apache_beam/examples/snippets/transforms/aggregation/groupbykey_test.py" 251 "apache_beam/examples/snippets/transforms/aggregation/groupintobatches_test.py" 252 "apache_beam/examples/snippets/transforms/aggregation/latest_test.py" 253 "apache_beam/examples/snippets/transforms/aggregation/max_test.py" 254 "apache_beam/examples/snippets/transforms/aggregation/mean_test.py" 255 "apache_beam/examples/snippets/transforms/aggregation/min_test.py" 256 "apache_beam/examples/snippets/transforms/aggregation/sample_test.py" 257 "apache_beam/examples/snippets/transforms/aggregation/sum_test.py" 258 "apache_beam/examples/snippets/transforms/aggregation/tolist_test.py" 259 "apache_beam/examples/snippets/transforms/aggregation/top_test.py" 260 "apache_beam/examples/snippets/transforms/elementwise/filter_test.py" 261 "apache_beam/examples/snippets/transforms/elementwise/flatmap_test.py" 262 "apache_beam/examples/snippets/transforms/elementwise/keys_test.py" 263 "apache_beam/examples/snippets/transforms/elementwise/kvswap_test.py" 264 "apache_beam/examples/snippets/transforms/elementwise/map_test.py" 265 "apache_beam/examples/snippets/transforms/elementwise/pardo_test.py" 266 "apache_beam/examples/snippets/transforms/elementwise/partition_test.py" 267 "apache_beam/examples/snippets/transforms/elementwise/regex_test.py" 268 "apache_beam/examples/snippets/transforms/elementwise/tostring_test.py" 269 "apache_beam/examples/snippets/transforms/elementwise/values_test.py" 270 "apache_beam/examples/snippets/transforms/elementwise/withtimestamps_test.py" 271 "apache_beam/examples/snippets/transforms/other/create_test.py" 272 "apache_beam/examples/snippets/transforms/other/flatten_test.py" 273 "apache_beam/examples/snippets/transforms/other/window_test.py" 274 "apache_beam/examples/snippets/util_test.py" 275 "apache_beam/io/avroio_test.py" 276 "apache_beam/io/concat_source_test.py" 277 "apache_beam/io/filebasedsink_test.py" 278 "apache_beam/io/filebasedsource_test.py" 279 "apache_beam/io/fileio_test.py" 280 "apache_beam/io/mongodbio_test.py" 281 "apache_beam/io/parquetio_test.py" 282 "apache_beam/io/sources_test.py" 283 "apache_beam/io/textio_test.py" 284 "apache_beam/io/tfrecordio_test.py" 285 "apache_beam/metrics/metric_test.py" 286 "apache_beam/ml/inference/base_test.py" 287 "apache_beam/ml/inference/sklearn_inference_test.py" 288 "apache_beam/ml/inference/utils_test.py" 289 "apache_beam/ml/rag/chunking/base_test.py" 290 "apache_beam/ml/rag/ingestion/base_test.py" 291 "apache_beam/pipeline_test.py" 292 "apache_beam/runners/direct/direct_runner_test.py" 293 "apache_beam/runners/direct/sdf_direct_runner_test.py" 294 "apache_beam/runners/interactive/interactive_beam_test.py" 295 "apache_beam/runners/interactive/interactive_runner_test.py" 296 "apache_beam/runners/interactive/non_interactive_runner_test.py" 297 "apache_beam/runners/interactive/recording_manager_test.py" 298 "apache_beam/runners/portability/fn_api_runner/translations_test.py" 299 "apache_beam/runners/portability/fn_api_runner/trigger_manager_test.py" 300 "apache_beam/runners/portability/stager_test.py" 301 "apache_beam/testing/synthetic_pipeline_test.py" 302 "apache_beam/testing/test_stream_test.py" 303 "apache_beam/testing/util_test.py" 304 "apache_beam/transforms/combiners_test.py" 305 "apache_beam/transforms/core_test.py" 306 "apache_beam/transforms/create_test.py" 307 "apache_beam/transforms/deduplicate_test.py" 308 "apache_beam/transforms/periodicsequence_test.py" 309 "apache_beam/transforms/ptransform_test.py" 310 "apache_beam/transforms/sideinputs_test.py" 311 "apache_beam/transforms/stats_test.py" 312 "apache_beam/transforms/transforms_keyword_only_args_test.py" 313 "apache_beam/transforms/trigger_test.py" 314 "apache_beam/transforms/userstate_test.py" 315 "apache_beam/transforms/util_test.py" 316 "apache_beam/transforms/write_ptransform_test.py" 317 318 # FIXME AttributeError: 'Namespace' object has no attribute 'test_pipeline_options' 319 # Upstream issue https://github.com/apache/beam/issues/33853 320 "apache_beam/runners/portability/prism_runner_test.py" 321 322 # FIXME ValueError: Unable to run pipeline with requirement: unsupported_requirement 323 # Upstream issuehttps://github.com/apache/beam/issues/33853 324 "apache_beam/yaml/yaml_transform_scope_test.py" 325 "apache_beam/yaml/yaml_transform_test.py" 326 "apache_beam/yaml/yaml_transform_unit_test.py" 327 "apache_beam/yaml/yaml_udf_test.py" 328 "apache_beam/dataframe/frames_test.py" 329 330 # FIXME Those tests do not terminate due to a grpc error (threading issue) 331 # grpc_status:14, grpc_message:"Cancelling all calls"}" 332 # Upstream issue https://github.com/apache/beam/issues/33851 333 "apache_beam/runners/portability/portable_runner_test.py" 334 ] 335 ++ lib.optionals (pythonAtLeast "3.13") [ 336 # > instruction = ofs_table[pc] 337 # E KeyError: 18 338 "apache_beam/typehints/trivial_inference_test.py" 339 ]; 340 341 disabledTests = 342 lib.optionals stdenv.hostPlatform.isDarwin [ 343 # PermissionError: [Errno 13] Permission denied: '/tmp/...' 344 "test_cache_manager_uses_local_ib_cache_root" 345 "test_describe_all_recordings" 346 "test_find_out_correct_user_pipeline" 347 "test_get_cache_manager_creates_cache_manager_if_absent" 348 "test_streaming_cache_uses_local_ib_cache_root" 349 "test_track_user_pipeline_cleanup_non_inspectable_pipeline" 350 ] 351 ++ lib.optionals (pythonAtLeast "3.12") [ 352 # TypeError: Could not determine schema for type hint Any. 353 "test_batching_beam_row_input" 354 "test_auto_convert" 355 "test_unbatching_series" 356 "test_batching_beam_row_to_dataframe" 357 358 # AssertionError: Any != <class 'int'> 359 "test_pycallable_map" 360 "testAlwaysReturnsEarly" 361 362 # TypeError: Expected Iterator in return type annotatio 363 "test_get_output_batch_type" 364 ]; 365 366 meta = { 367 description = "Unified model for defining both batch and streaming data-parallel processing pipelines"; 368 homepage = "https://beam.apache.org/"; 369 changelog = "https://github.com/apache/beam/blob/release-${version}/CHANGES.md"; 370 license = lib.licenses.asl20; 371 maintainers = with lib.maintainers; [ ndl ]; 372 }; 373}