Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
at r-updates 379 lines 13 kB view raw
1{ 2 lib, 3 stdenv, 4 buildPythonPackage, 5 fetchFromGitHub, 6 7 # build-system 8 cython, 9 distlib, 10 grpcio-tools, 11 jinja2, 12 jsonpickle, 13 jsonschema, 14 mypy-protobuf, 15 redis, 16 setuptools, 17 yapf, 18 19 # dependencies 20 crcmod, 21 dill, 22 fastavro, 23 fasteners, 24 grpcio, 25 hdfs, 26 httplib2, 27 numpy, 28 objsize, 29 orjson, 30 proto-plus, 31 protobuf, 32 pyarrow, 33 pydot, 34 pymongo, 35 python-dateutil, 36 pytz, 37 regex, 38 requests, 39 typing-extensions, 40 zstandard, 41 42 # tests 43 python, 44 docstring-parser, 45 freezegun, 46 hypothesis, 47 mock, 48 pandas, 49 parameterized, 50 psycopg2, 51 pyhamcrest, 52 pytest-xdist, 53 pytestCheckHook, 54 pyyaml, 55 requests-mock, 56 scikit-learn, 57 sqlalchemy, 58 tenacity, 59 testcontainers, 60 pythonAtLeast, 61}: 62 63buildPythonPackage rec { 64 pname = "apache-beam"; 65 version = "2.66.0"; 66 pyproject = true; 67 68 src = fetchFromGitHub { 69 owner = "apache"; 70 repo = "beam"; 71 tag = "v${version}"; 72 hash = "sha256-nRofy9pvhO5SUvkIk73ViFm1gPWxEhj1rAUeCVYIpYs="; 73 }; 74 75 pythonRelaxDeps = [ 76 "grpcio" 77 "jsonpickle" 78 79 # As of apache-beam v2.55.1, the requirement is cloudpickle~=2.2.1, but 80 # the current (2024-04-20) nixpkgs's pydot version is 3.0.0. 81 "cloudpickle" 82 83 # See https://github.com/NixOS/nixpkgs/issues/156957 84 "dill" 85 86 "numpy" 87 88 "protobuf" 89 90 # As of apache-beam v2.45.0, the requirement is pyarrow<10.0.0,>=0.15.1, but 91 # the current (2023-02-22) nixpkgs's pyarrow version is 11.0.0. 92 "pyarrow" 93 94 "pydot" 95 "redis" 96 ]; 97 98 sourceRoot = "${src.name}/sdks/python"; 99 100 build-system = [ 101 cython 102 distlib 103 grpcio-tools 104 jinja2 105 jsonpickle 106 jsonschema 107 mypy-protobuf 108 redis 109 setuptools 110 yapf 111 ]; 112 113 dependencies = [ 114 crcmod 115 dill 116 fastavro 117 fasteners 118 grpcio 119 hdfs 120 httplib2 121 numpy 122 objsize 123 orjson 124 proto-plus 125 protobuf 126 pyarrow 127 pydot 128 pymongo 129 python-dateutil 130 pytz 131 regex 132 requests 133 typing-extensions 134 zstandard 135 ]; 136 137 enableParallelBuilding = true; 138 139 postPatch = '' 140 substituteInPlace pyproject.toml \ 141 --replace-fail "distlib==0.3.7" "distlib" \ 142 --replace-fail "yapf==0.29.0" "yapf" \ 143 --replace-fail "grpcio-tools==1.62.1" "grpcio-tools" \ 144 --replace-fail "mypy-protobuf==3.5.0" "mypy-protobuf" \ 145 --replace-fail "numpy>=1.14.3,<2.3.0" "numpy" 146 147 substituteInPlace setup.py \ 148 --replace-fail " copy_tests_from_docs()" "" 149 ''; 150 151 __darwinAllowLocalNetworking = true; 152 153 pythonImportsCheck = [ "apache_beam" ]; 154 155 nativeCheckInputs = [ 156 docstring-parser 157 freezegun 158 hypothesis 159 mock 160 pandas 161 parameterized 162 psycopg2 163 pyhamcrest 164 pytest-xdist 165 pytestCheckHook 166 pyyaml 167 requests-mock 168 scikit-learn 169 sqlalchemy 170 tenacity 171 testcontainers 172 ]; 173 174 # Make sure we're running the tests for the actually installed 175 # package, so that cython's .so files are available. 176 preCheck = '' 177 cd $out/${python.sitePackages} 178 ''; 179 180 disabledTestPaths = [ 181 # Fails with 182 # _______ ERROR collecting apache_beam/io/external/xlang_jdbcio_it_test.py _______ 183 # apache_beam/io/external/xlang_jdbcio_it_test.py:80: in <module> 184 # class CrossLanguageJdbcIOTest(unittest.TestCase): 185 # apache_beam/io/external/xlang_jdbcio_it_test.py:99: in CrossLanguageJdbcIOTest 186 # container_init: Callable[[], Union[PostgresContainer, MySqlContainer]], 187 # E NameError: name 'MySqlContainer' is not defined 188 # 189 "apache_beam/io/external/xlang_jdbcio_it_test.py" 190 191 # These tests depend on the availability of specific servers backends. 192 "apache_beam/runners/portability/flink_runner_test.py" 193 "apache_beam/runners/portability/samza_runner_test.py" 194 "apache_beam/runners/portability/spark_runner_test.py" 195 196 # Fails starting from dill 0.3.6 because it tries to pickle pytest globals: 197 # https://github.com/uqfoundation/dill/issues/482#issuecomment-1139017499. 198 "apache_beam/transforms/window_test.py" 199 200 # See https://github.com/apache/beam/issues/25390. 201 "apache_beam/coders/slow_coders_test.py" 202 "apache_beam/dataframe/pandas_doctests_test.py" 203 "apache_beam/typehints/typed_pipeline_test.py" 204 "apache_beam/coders/fast_coders_test.py" 205 "apache_beam/dataframe/schemas_test.py" 206 207 # Fails with TypeError: cannot pickle 'EncodedFile' instances 208 # Upstream issue https://github.com/apache/beam/issues/33889 209 "apache_beam/options/pipeline_options_validator_test.py" 210 "apache_beam/yaml/main_test.py" 211 "apache_beam/yaml/programming_guide_test.py" 212 "apache_beam/yaml/readme_test.py" 213 "apache_beam/yaml/yaml_combine_test.py" 214 "apache_beam/yaml/yaml_enrichment_test.py" 215 "apache_beam/yaml/yaml_io_test.py" 216 "apache_beam/yaml/yaml_join_test.py" 217 "apache_beam/yaml/yaml_mapping_test.py" 218 "apache_beam/yaml/yaml_ml_test.py" 219 "apache_beam/yaml/yaml_provider_unit_test.py" 220 221 # FIXME All those fails due to a single- AttributeError: 'MaybeReshuffle' object has no attribute 'side_inputs' 222 # Upstream issue https://github.com/apache/beam/issues/33854 223 "apache_beam/coders/row_coder_test.py" 224 "apache_beam/examples/avro_nyc_trips_test.py" 225 "apache_beam/examples/complete/autocomplete_test.py" 226 "apache_beam/examples/complete/estimate_pi_test.py" 227 "apache_beam/examples/complete/game/game_stats_test.py" 228 "apache_beam/examples/complete/game/hourly_team_score_test.py" 229 "apache_beam/examples/complete/game/leader_board_test.py" 230 "apache_beam/examples/complete/game/user_score_test.py" 231 "apache_beam/examples/complete/tfidf_test.py" 232 "apache_beam/examples/complete/top_wikipedia_sessions_test.py" 233 "apache_beam/examples/cookbook/bigquery_side_input_test.py" 234 "apache_beam/examples/cookbook/bigquery_tornadoes_test.py" 235 "apache_beam/examples/cookbook/coders_test.py" 236 "apache_beam/examples/cookbook/combiners_test.py" 237 "apache_beam/examples/cookbook/custom_ptransform_test.py" 238 "apache_beam/examples/cookbook/filters_test.py" 239 "apache_beam/examples/matrix_power_test.py" 240 "apache_beam/examples/snippets/snippets_test.py" 241 "apache_beam/examples/snippets/transforms/aggregation/approximatequantiles_test.py" 242 "apache_beam/examples/snippets/transforms/aggregation/approximateunique_test.py" 243 "apache_beam/examples/snippets/transforms/aggregation/batchelements_test.py" 244 "apache_beam/examples/snippets/transforms/aggregation/cogroupbykey_test.py" 245 "apache_beam/examples/snippets/transforms/aggregation/combineglobally_test.py" 246 "apache_beam/examples/snippets/transforms/aggregation/combineperkey_test.py" 247 "apache_beam/examples/snippets/transforms/aggregation/combinevalues_test.py" 248 "apache_beam/examples/snippets/transforms/aggregation/count_test.py" 249 "apache_beam/examples/snippets/transforms/aggregation/distinct_test.py" 250 "apache_beam/examples/snippets/transforms/aggregation/groupbykey_test.py" 251 "apache_beam/examples/snippets/transforms/aggregation/groupintobatches_test.py" 252 "apache_beam/examples/snippets/transforms/aggregation/latest_test.py" 253 "apache_beam/examples/snippets/transforms/aggregation/max_test.py" 254 "apache_beam/examples/snippets/transforms/aggregation/mean_test.py" 255 "apache_beam/examples/snippets/transforms/aggregation/min_test.py" 256 "apache_beam/examples/snippets/transforms/aggregation/sample_test.py" 257 "apache_beam/examples/snippets/transforms/aggregation/sum_test.py" 258 "apache_beam/examples/snippets/transforms/aggregation/tolist_test.py" 259 "apache_beam/examples/snippets/transforms/aggregation/top_test.py" 260 "apache_beam/examples/snippets/transforms/elementwise/filter_test.py" 261 "apache_beam/examples/snippets/transforms/elementwise/flatmap_test.py" 262 "apache_beam/examples/snippets/transforms/elementwise/keys_test.py" 263 "apache_beam/examples/snippets/transforms/elementwise/kvswap_test.py" 264 "apache_beam/examples/snippets/transforms/elementwise/map_test.py" 265 "apache_beam/examples/snippets/transforms/elementwise/pardo_test.py" 266 "apache_beam/examples/snippets/transforms/elementwise/partition_test.py" 267 "apache_beam/examples/snippets/transforms/elementwise/regex_test.py" 268 "apache_beam/examples/snippets/transforms/elementwise/tostring_test.py" 269 "apache_beam/examples/snippets/transforms/elementwise/values_test.py" 270 "apache_beam/examples/snippets/transforms/elementwise/withtimestamps_test.py" 271 "apache_beam/examples/snippets/transforms/other/create_test.py" 272 "apache_beam/examples/snippets/transforms/other/flatten_test.py" 273 "apache_beam/examples/snippets/transforms/other/window_test.py" 274 "apache_beam/examples/snippets/util_test.py" 275 "apache_beam/io/avroio_test.py" 276 "apache_beam/io/concat_source_test.py" 277 "apache_beam/io/filebasedsink_test.py" 278 "apache_beam/io/filebasedsource_test.py" 279 "apache_beam/io/fileio_test.py" 280 "apache_beam/io/mongodbio_test.py" 281 "apache_beam/io/parquetio_test.py" 282 "apache_beam/io/sources_test.py" 283 "apache_beam/io/textio_test.py" 284 "apache_beam/io/tfrecordio_test.py" 285 "apache_beam/metrics/metric_test.py" 286 "apache_beam/ml/inference/base_test.py" 287 "apache_beam/ml/inference/sklearn_inference_test.py" 288 "apache_beam/ml/inference/utils_test.py" 289 "apache_beam/ml/rag/chunking/base_test.py" 290 "apache_beam/ml/rag/ingestion/base_test.py" 291 "apache_beam/pipeline_test.py" 292 "apache_beam/runners/direct/direct_runner_test.py" 293 "apache_beam/runners/direct/sdf_direct_runner_test.py" 294 "apache_beam/runners/interactive/interactive_beam_test.py" 295 "apache_beam/runners/interactive/interactive_runner_test.py" 296 "apache_beam/runners/interactive/non_interactive_runner_test.py" 297 "apache_beam/runners/interactive/recording_manager_test.py" 298 "apache_beam/runners/portability/fn_api_runner/translations_test.py" 299 "apache_beam/runners/portability/fn_api_runner/trigger_manager_test.py" 300 "apache_beam/runners/portability/stager_test.py" 301 "apache_beam/testing/synthetic_pipeline_test.py" 302 "apache_beam/testing/test_stream_test.py" 303 "apache_beam/testing/util_test.py" 304 "apache_beam/transforms/combiners_test.py" 305 "apache_beam/transforms/core_test.py" 306 "apache_beam/transforms/create_test.py" 307 "apache_beam/transforms/deduplicate_test.py" 308 "apache_beam/transforms/periodicsequence_test.py" 309 "apache_beam/transforms/ptransform_test.py" 310 "apache_beam/transforms/sideinputs_test.py" 311 "apache_beam/transforms/stats_test.py" 312 "apache_beam/transforms/transforms_keyword_only_args_test.py" 313 "apache_beam/transforms/trigger_test.py" 314 "apache_beam/transforms/userstate_test.py" 315 "apache_beam/transforms/util_test.py" 316 "apache_beam/transforms/write_ptransform_test.py" 317 318 # FIXME AttributeError: 'Namespace' object has no attribute 'test_pipeline_options' 319 # Upstream issue https://github.com/apache/beam/issues/33853 320 "apache_beam/runners/portability/prism_runner_test.py" 321 322 # FIXME ValueError: Unable to run pipeline with requirement: unsupported_requirement 323 # Upstream issuehttps://github.com/apache/beam/issues/33853 324 "apache_beam/yaml/yaml_transform_scope_test.py" 325 "apache_beam/yaml/yaml_transform_test.py" 326 "apache_beam/yaml/yaml_transform_unit_test.py" 327 "apache_beam/yaml/yaml_udf_test.py" 328 "apache_beam/dataframe/frames_test.py" 329 330 # FIXME Those tests do not terminate due to a grpc error (threading issue) 331 # grpc_status:14, grpc_message:"Cancelling all calls"}" 332 # Upstream issue https://github.com/apache/beam/issues/33851 333 "apache_beam/runners/portability/portable_runner_test.py" 334 ] 335 ++ lib.optionals (pythonAtLeast "3.13") [ 336 # > instruction = ofs_table[pc] 337 # E KeyError: 18 338 "apache_beam/typehints/trivial_inference_test.py" 339 ]; 340 341 disabledTests = [ 342 # IndexError: list index out of range 343 "test_only_sample_exceptions" 344 345 # AssertionError: False is not true 346 "test_samples_all_with_both_experiments" 347 ] 348 ++ lib.optionals stdenv.hostPlatform.isDarwin [ 349 # PermissionError: [Errno 13] Permission denied: '/tmp/...' 350 "test_cache_manager_uses_local_ib_cache_root" 351 "test_describe_all_recordings" 352 "test_find_out_correct_user_pipeline" 353 "test_get_cache_manager_creates_cache_manager_if_absent" 354 "test_streaming_cache_uses_local_ib_cache_root" 355 "test_track_user_pipeline_cleanup_non_inspectable_pipeline" 356 ] 357 ++ lib.optionals (pythonAtLeast "3.12") [ 358 # TypeError: Could not determine schema for type hint Any. 359 "test_batching_beam_row_input" 360 "test_auto_convert" 361 "test_unbatching_series" 362 "test_batching_beam_row_to_dataframe" 363 364 # AssertionError: Any != <class 'int'> 365 "test_pycallable_map" 366 "testAlwaysReturnsEarly" 367 368 # TypeError: Expected Iterator in return type annotatio 369 "test_get_output_batch_type" 370 ]; 371 372 meta = { 373 description = "Unified model for defining both batch and streaming data-parallel processing pipelines"; 374 homepage = "https://beam.apache.org/"; 375 changelog = "https://github.com/apache/beam/blob/release-${src.tag}/CHANGES.md"; 376 license = lib.licenses.asl20; 377 maintainers = with lib.maintainers; [ ndl ]; 378 }; 379}