1{ 2 buildPythonPackage, 3 cloudpickle, 4 crcmod, 5 cython, 6 dill, 7 fastavro, 8 fasteners, 9 fetchFromGitHub, 10 fetchpatch, 11 freezegun, 12 grpcio, 13 grpcio-tools, 14 hdfs, 15 httplib2, 16 hypothesis, 17 lib, 18 mock, 19 mypy-protobuf, 20 numpy, 21 objsize, 22 orjson, 23 pandas, 24 parameterized, 25 proto-plus, 26 protobuf, 27 psycopg2, 28 pyarrow, 29 pydot, 30 pyhamcrest, 31 pymongo, 32 pytest-xdist, 33 pytestCheckHook, 34 python, 35 python-dateutil, 36 pythonRelaxDepsHook, 37 pytz, 38 pyyaml, 39 regex, 40 requests, 41 requests-mock, 42 scikit-learn, 43 setuptools, 44 sqlalchemy, 45 tenacity, 46 testcontainers, 47 typing-extensions, 48 zstandard, 49}: 50 51buildPythonPackage rec { 52 pname = "apache-beam"; 53 version = "2.54.0"; 54 pyproject = true; 55 56 src = fetchFromGitHub { 57 owner = "apache"; 58 repo = "beam"; 59 rev = "refs/tags/v${version}"; 60 hash = "sha256-DcqYBPAS+yUqTJLUem8+2OqRUzb6DoBOeRkMjmvuvws="; 61 }; 62 63 patches = [ 64 (fetchpatch { 65 # https://github.com/apache/beam/pull/24143 66 name = "fix-for-dill-0.3.6.patch"; 67 url = "https://github.com/apache/beam/commit/7e014435b816015d21cc07f3f6c80809f3d8023d.patch"; 68 hash = "sha256-iUmnzrItTFM98w3mpadzrmtI3t0fucpSujAg/6qxCGk="; 69 stripLen = 2; 70 }) 71 ]; 72 73 pythonRelaxDeps = [ 74 # See https://github.com/NixOS/nixpkgs/issues/156957 75 "dill" 76 "numpy" 77 "pymongo" 78 79 # See https://github.com/NixOS/nixpkgs/issues/193613 80 "protobuf" 81 82 # As of apache-beam v2.45.0, the requirement is httplib2>=0.8,<0.21.0, but 83 # the current (2023-02-08) nixpkgs's httplib2 version is 0.21.0. This can be 84 # removed once beam is upgraded since the current requirement on master is 85 # for httplib2>=0.8,<0.22.0. 86 "httplib2" 87 88 # As of apache-beam v2.45.0, the requirement is pyarrow<10.0.0,>=0.15.1, but 89 # the current (2023-02-22) nixpkgs's pyarrow version is 11.0.0. 90 "pyarrow" 91 ]; 92 93 sourceRoot = "${src.name}/sdks/python"; 94 95 nativeBuildInputs = [ 96 cython 97 grpcio-tools 98 mypy-protobuf 99 pythonRelaxDepsHook 100 setuptools 101 ]; 102 103 propagatedBuildInputs = [ 104 cloudpickle 105 crcmod 106 dill 107 fastavro 108 fasteners 109 grpcio 110 hdfs 111 httplib2 112 numpy 113 objsize 114 orjson 115 proto-plus 116 protobuf 117 pyarrow 118 pydot 119 pymongo 120 python-dateutil 121 pytz 122 regex 123 requests 124 typing-extensions 125 zstandard 126 ]; 127 128 enableParallelBuilding = true; 129 130 pythonImportsCheck = [ "apache_beam" ]; 131 132 checkInputs = [ 133 freezegun 134 hypothesis 135 mock 136 pandas 137 parameterized 138 psycopg2 139 pyhamcrest 140 pytestCheckHook 141 pytest-xdist 142 pyyaml 143 requests-mock 144 scikit-learn 145 sqlalchemy 146 tenacity 147 testcontainers 148 ]; 149 150 # Make sure we're running the tests for the actually installed 151 # package, so that cython's .so files are available. 152 preCheck = "cd $out/${python.sitePackages}"; 153 154 disabledTestPaths = [ 155 # Fails with 156 # _______ ERROR collecting apache_beam/io/external/xlang_jdbcio_it_test.py _______ 157 # apache_beam/io/external/xlang_jdbcio_it_test.py:80: in <module> 158 # class CrossLanguageJdbcIOTest(unittest.TestCase): 159 # apache_beam/io/external/xlang_jdbcio_it_test.py:99: in CrossLanguageJdbcIOTest 160 # container_init: Callable[[], Union[PostgresContainer, MySqlContainer]], 161 # E NameError: name 'MySqlContainer' is not defined 162 # 163 "apache_beam/io/external/xlang_jdbcio_it_test.py" 164 165 # These tests depend on the availability of specific servers backends. 166 "apache_beam/runners/portability/flink_runner_test.py" 167 "apache_beam/runners/portability/samza_runner_test.py" 168 "apache_beam/runners/portability/spark_runner_test.py" 169 170 # Fails starting from dill 0.3.6 because it tries to pickle pytest globals: 171 # https://github.com/uqfoundation/dill/issues/482#issuecomment-1139017499. 172 "apache_beam/transforms/window_test.py" 173 174 # See https://github.com/apache/beam/issues/25390. 175 "apache_beam/coders/slow_coders_test.py" 176 "apache_beam/dataframe/pandas_doctests_test.py" 177 "apache_beam/typehints/typed_pipeline_test.py" 178 "apache_beam/coders/fast_coders_test.py" 179 "apache_beam/dataframe/schemas_test.py" 180 ]; 181 182 disabledTests = [ 183 # The reasons of failures for these tests are unclear. 184 # They reproduce in Docker with Ubuntu 22.04 185 # (= they're not `nixpkgs`-specific) but given the upstream uses 186 # quite elaborate testing infra with containers and multiple 187 # different runners - I don't expect them to help debugging these 188 # when running via our (= custom from their PoV) testing infra. 189 "test_with_main_session" 190 # AssertionErrors 191 "test_unified_repr" 192 "testDictComprehension" 193 "testDictComprehensionSimple" 194 "testGenerator" 195 "testGeneratorComprehension" 196 "testListComprehension" 197 "testNoneReturn" 198 "testSet" 199 "testTupleListComprehension" 200 "test_newtype" 201 "test_pardo_type_inference" 202 "test_get_output_batch_type" 203 "test_pformat_namedtuple_with_unnamed_fields" 204 "test_row_coder_fail_early_bad_schema" 205 # See https://github.com/apache/beam/issues/26004. 206 "test_batch_encode_decode" 207 ]; 208 209 meta = with lib; { 210 description = "Unified model for defining both batch and streaming data-parallel processing pipelines"; 211 homepage = "https://beam.apache.org/"; 212 license = licenses.asl20; 213 maintainers = with maintainers; [ ndl ]; 214 # https://github.com/apache/beam/issues/27221 215 broken = lib.versionAtLeast pandas.version "2"; 216 }; 217}