1{ buildPythonPackage 2, cloudpickle 3, crcmod 4, cython 5, dill 6, fastavro 7, fasteners 8, fetchFromGitHub 9, fetchpatch 10, freezegun 11, grpcio 12, grpcio-tools 13, hdfs 14, httplib2 15, hypothesis 16, lib 17, mock 18, mypy-protobuf 19, numpy 20, objsize 21, orjson 22, pandas 23, parameterized 24, proto-plus 25, protobuf 26, psycopg2 27, pyarrow 28, pydot 29, pyhamcrest 30, pymongo 31, pytest-xdist 32, pytestCheckHook 33, python 34, python-dateutil 35, pythonRelaxDepsHook 36, pytz 37, pyyaml 38, regex 39, requests 40, requests-mock 41, scikit-learn 42, sqlalchemy 43, tenacity 44, testcontainers 45, typing-extensions 46, zstandard 47}: 48 49buildPythonPackage rec { 50 pname = "apache-beam"; 51 version = "2.50.0"; 52 53 src = fetchFromGitHub { 54 owner = "apache"; 55 repo = "beam"; 56 rev = "refs/tags/v${version}"; 57 hash = "sha256-qaxYWPVdMlegvH/W66UBoQbcQ5Ac/3DNoQs8xo+KfLc="; 58 }; 59 60 patches = [ 61 (fetchpatch { 62 # https://github.com/apache/beam/pull/24143 63 name = "fix-for-dill-0.3.6.patch"; 64 url = "https://github.com/apache/beam/commit/7e014435b816015d21cc07f3f6c80809f3d8023d.patch"; 65 hash = "sha256-iUmnzrItTFM98w3mpadzrmtI3t0fucpSujAg/6qxCGk="; 66 stripLen = 2; 67 }) 68 ]; 69 70 pythonRelaxDeps = [ 71 # See https://github.com/NixOS/nixpkgs/issues/156957 72 "dill" 73 "numpy" 74 "pymongo" 75 76 # See https://github.com/NixOS/nixpkgs/issues/193613 77 "protobuf" 78 79 # As of apache-beam v2.45.0, the requirement is httplib2>=0.8,<0.21.0, but 80 # the current (2023-02-08) nixpkgs's httplib2 version is 0.21.0. This can be 81 # removed once beam is upgraded since the current requirement on master is 82 # for httplib2>=0.8,<0.22.0. 83 "httplib2" 84 85 # As of apache-beam v2.45.0, the requirement is pyarrow<10.0.0,>=0.15.1, but 86 # the current (2023-02-22) nixpkgs's pyarrow version is 11.0.0. 87 "pyarrow" 88 ]; 89 90 sourceRoot = "${src.name}/sdks/python"; 91 92 nativeBuildInputs = [ 93 cython 94 grpcio-tools 95 mypy-protobuf 96 pythonRelaxDepsHook 97 ]; 98 99 propagatedBuildInputs = [ 100 cloudpickle 101 crcmod 102 dill 103 fastavro 104 fasteners 105 grpcio 106 hdfs 107 httplib2 108 numpy 109 objsize 110 orjson 111 proto-plus 112 protobuf 113 pyarrow 114 pydot 115 pymongo 116 python-dateutil 117 pytz 118 regex 119 requests 120 typing-extensions 121 zstandard 122 ]; 123 124 enableParallelBuilding = true; 125 126 pythonImportsCheck = [ 127 "apache_beam" 128 ]; 129 130 checkInputs = [ 131 freezegun 132 hypothesis 133 mock 134 pandas 135 parameterized 136 psycopg2 137 pyhamcrest 138 pytestCheckHook 139 pytest-xdist 140 pyyaml 141 requests-mock 142 scikit-learn 143 sqlalchemy 144 tenacity 145 testcontainers 146 ]; 147 148 # Make sure we're running the tests for the actually installed 149 # package, so that cython's .so files are available. 150 preCheck = "cd $out/lib/${python.libPrefix}/site-packages"; 151 152 disabledTestPaths = [ 153 # Fails with 154 # _______ ERROR collecting apache_beam/io/external/xlang_jdbcio_it_test.py _______ 155 # apache_beam/io/external/xlang_jdbcio_it_test.py:80: in <module> 156 # class CrossLanguageJdbcIOTest(unittest.TestCase): 157 # apache_beam/io/external/xlang_jdbcio_it_test.py:99: in CrossLanguageJdbcIOTest 158 # container_init: Callable[[], Union[PostgresContainer, MySqlContainer]], 159 # E NameError: name 'MySqlContainer' is not defined 160 # 161 "apache_beam/io/external/xlang_jdbcio_it_test.py" 162 163 # These tests depend on the availability of specific servers backends. 164 "apache_beam/runners/portability/flink_runner_test.py" 165 "apache_beam/runners/portability/samza_runner_test.py" 166 "apache_beam/runners/portability/spark_runner_test.py" 167 168 # Fails starting from dill 0.3.6 because it tries to pickle pytest globals: 169 # https://github.com/uqfoundation/dill/issues/482#issuecomment-1139017499. 170 "apache_beam/transforms/window_test.py" 171 172 # See https://github.com/apache/beam/issues/25390. 173 "apache_beam/coders/slow_coders_test.py" 174 "apache_beam/dataframe/pandas_doctests_test.py" 175 "apache_beam/typehints/typed_pipeline_test.py" 176 "apache_beam/coders/fast_coders_test.py" 177 "apache_beam/dataframe/schemas_test.py" 178 ]; 179 180 disabledTests = [ 181 # The reasons of failures for these tests are unclear. 182 # They reproduce in Docker with Ubuntu 22.04 183 # (= they're not `nixpkgs`-specific) but given the upstream uses 184 # quite elaborate testing infra with containers and multiple 185 # different runners - I don't expect them to help debugging these 186 # when running via our (= custom from their PoV) testing infra. 187 "test_with_main_session" 188 # AssertionErrors 189 "test_unified_repr" 190 "testDictComprehension" 191 "testDictComprehensionSimple" 192 "testGenerator" 193 "testGeneratorComprehension" 194 "testListComprehension" 195 "testNoneReturn" 196 "testSet" 197 "testTupleListComprehension" 198 "test_newtype" 199 "test_pardo_type_inference" 200 "test_get_output_batch_type" 201 "test_pformat_namedtuple_with_unnamed_fields" 202 "test_row_coder_fail_early_bad_schema" 203 # See https://github.com/apache/beam/issues/26004. 204 "test_batch_encode_decode" 205 ]; 206 207 meta = with lib; { 208 description = "Unified model for defining both batch and streaming data-parallel processing pipelines"; 209 homepage = "https://beam.apache.org/"; 210 license = licenses.asl20; 211 maintainers = with maintainers; [ ndl ]; 212 # https://github.com/apache/beam/issues/27221 213 broken = lib.versionAtLeast pandas.version "2"; 214 }; 215}