1{ buildPythonPackage 2, cloudpickle 3, crcmod 4, cython 5, dill 6, fastavro 7, fetchFromGitHub 8, fetchpatch 9, freezegun 10, grpcio 11, grpcio-tools 12, hdfs 13, httplib2 14, lib 15, mock 16, mypy-protobuf 17, numpy 18, oauth2client 19, orjson 20, pandas 21, parameterized 22, proto-plus 23, protobuf 24, psycopg2 25, pyarrow 26, pydot 27, pyhamcrest 28, pymongo 29, pytestCheckHook 30, python 31, python-dateutil 32, pythonAtLeast 33, pythonRelaxDepsHook 34, pytz 35, pyyaml 36, requests 37, requests-mock 38, scikit-learn 39, setuptools 40, sqlalchemy 41, tenacity 42, testcontainers 43, typing-extensions 44}: 45 46buildPythonPackage rec { 47 pname = "apache-beam"; 48 version = "2.40.0"; 49 50 src = fetchFromGitHub { 51 owner = "apache"; 52 repo = "beam"; 53 rev = "v${version}"; 54 sha256 = "sha256-0S7Dj6PMSbZkEAY6ZLUpKVfe/tFxsq60TTAFj0Qhtv0="; 55 }; 56 57 patches = [ 58 (fetchpatch { 59 # https://github.com/apache/beam/pull/24143 60 name = "fix-for-dill-0.3.6.patch"; 61 url = "https://github.com/apache/beam/commit/7e014435b816015d21cc07f3f6c80809f3d8023d.patch"; 62 hash = "sha256-iUmnzrItTFM98w3mpadzrmtI3t0fucpSujAg/6qxCGk="; 63 stripLen = 2; 64 }) 65 ]; 66 67 pythonRelaxDeps = [ 68 # See https://github.com/NixOS/nixpkgs/issues/156957 69 "dill" 70 "numpy" 71 "pyarrow" 72 "pymongo" 73 74 # See https://github.com/NixOS/nixpkgs/issues/193613 75 "protobuf" 76 ]; 77 78 sourceRoot = "source/sdks/python"; 79 80 nativeBuildInputs = [ 81 cython 82 grpcio-tools 83 mypy-protobuf 84 pythonRelaxDepsHook 85 ]; 86 87 propagatedBuildInputs = [ 88 cloudpickle 89 crcmod 90 cython 91 dill 92 fastavro 93 grpcio 94 hdfs 95 httplib2 96 numpy 97 oauth2client 98 orjson 99 proto-plus 100 protobuf 101 pyarrow 102 pydot 103 pymongo 104 python-dateutil 105 pytz 106 requests 107 setuptools 108 typing-extensions 109 ]; 110 111 enableParallelBuilding = true; 112 113 pythonImportsCheck = [ 114 "apache_beam" 115 ]; 116 117 checkInputs = [ 118 freezegun 119 mock 120 pandas 121 parameterized 122 psycopg2 123 pyhamcrest 124 pytestCheckHook 125 pyyaml 126 requests-mock 127 scikit-learn 128 sqlalchemy 129 tenacity 130 testcontainers 131 ]; 132 133 # Make sure we're running the tests for the actually installed 134 # package, so that cython's .so files are available. 135 preCheck = "cd $out/lib/${python.libPrefix}/site-packages"; 136 137 disabledTestPaths = [ 138 # Fails with 139 # _______ ERROR collecting apache_beam/io/external/xlang_jdbcio_it_test.py _______ 140 # apache_beam/io/external/xlang_jdbcio_it_test.py:80: in <module> 141 # class CrossLanguageJdbcIOTest(unittest.TestCase): 142 # apache_beam/io/external/xlang_jdbcio_it_test.py:99: in CrossLanguageJdbcIOTest 143 # container_init: Callable[[], Union[PostgresContainer, MySqlContainer]], 144 # E NameError: name 'MySqlContainer' is not defined 145 # 146 "apache_beam/io/external/xlang_jdbcio_it_test.py" 147 148 # These tests depend on the availability of specific servers backends. 149 "apache_beam/runners/portability/flink_runner_test.py" 150 "apache_beam/runners/portability/samza_runner_test.py" 151 "apache_beam/runners/portability/spark_runner_test.py" 152 153 # Fails starting from dill 0.3.6 because it tries to pickle pytest globals: 154 # https://github.com/uqfoundation/dill/issues/482#issuecomment-1139017499. 155 "apache_beam/transforms/window_test.py" 156 ]; 157 158 disabledTests = [ 159 # The reasons of failures for these tests are unclear. 160 # They reproduce in Docker with Ubuntu 22.04 161 # (= they're not `nixpkgs`-specific) but given the upstream uses 162 # quite elaborate testing infra with containers and multiple 163 # different runners - I don't expect them to help debugging these 164 # when running via our (= custom from their PoV) testing infra. 165 "test_with_main_session" 166 # AssertionErrors 167 "test_unified_repr" 168 "testDictComprehension" 169 "testDictComprehensionSimple" 170 "testGenerator" 171 "testGeneratorComprehension" 172 "testListComprehension" 173 "testNoneReturn" 174 "testSet" 175 "testTupleListComprehension" 176 "test_newtype" 177 "test_pardo_type_inference" 178 "test_get_output_batch_type" 179 "test_pformat_namedtuple_with_unnamed_fields" 180 "test_row_coder_fail_early_bad_schema" 181 ]; 182 183 meta = with lib; { 184 description = "Unified model for defining both batch and streaming data-parallel processing pipelines"; 185 homepage = "https://beam.apache.org/"; 186 license = licenses.asl20; 187 maintainers = with maintainers; [ ndl ]; 188 }; 189}