1{ buildPythonPackage
2, cloudpickle
3, crcmod
4, cython
5, dill
6, fastavro
7, fasteners
8, fetchFromGitHub
9, fetchpatch
10, freezegun
11, grpcio
12, grpcio-tools
13, hdfs
14, httplib2
15, hypothesis
16, lib
17, mock
18, mypy-protobuf
19, numpy
20, objsize
21, orjson
22, pandas
23, parameterized
24, proto-plus
25, protobuf
26, psycopg2
27, pyarrow
28, pydot
29, pyhamcrest
30, pymongo
31, pytest-xdist
32, pytestCheckHook
33, python
34, python-dateutil
35, pythonRelaxDepsHook
36, pytz
37, pyyaml
38, regex
39, requests
40, requests-mock
41, scikit-learn
42, sqlalchemy
43, tenacity
44, testcontainers
45, typing-extensions
46, zstandard
47}:
48
49buildPythonPackage rec {
50 pname = "apache-beam";
51 version = "2.50.0";
52
53 src = fetchFromGitHub {
54 owner = "apache";
55 repo = "beam";
56 rev = "refs/tags/v${version}";
57 hash = "sha256-qaxYWPVdMlegvH/W66UBoQbcQ5Ac/3DNoQs8xo+KfLc=";
58 };
59
60 patches = [
61 (fetchpatch {
62 # https://github.com/apache/beam/pull/24143
63 name = "fix-for-dill-0.3.6.patch";
64 url = "https://github.com/apache/beam/commit/7e014435b816015d21cc07f3f6c80809f3d8023d.patch";
65 hash = "sha256-iUmnzrItTFM98w3mpadzrmtI3t0fucpSujAg/6qxCGk=";
66 stripLen = 2;
67 })
68 ];
69
70 pythonRelaxDeps = [
71 # See https://github.com/NixOS/nixpkgs/issues/156957
72 "dill"
73 "numpy"
74 "pymongo"
75
76 # See https://github.com/NixOS/nixpkgs/issues/193613
77 "protobuf"
78
79 # As of apache-beam v2.45.0, the requirement is httplib2>=0.8,<0.21.0, but
80 # the current (2023-02-08) nixpkgs's httplib2 version is 0.21.0. This can be
81 # removed once beam is upgraded since the current requirement on master is
82 # for httplib2>=0.8,<0.22.0.
83 "httplib2"
84
85 # As of apache-beam v2.45.0, the requirement is pyarrow<10.0.0,>=0.15.1, but
86 # the current (2023-02-22) nixpkgs's pyarrow version is 11.0.0.
87 "pyarrow"
88 ];
89
90 sourceRoot = "${src.name}/sdks/python";
91
92 nativeBuildInputs = [
93 cython
94 grpcio-tools
95 mypy-protobuf
96 pythonRelaxDepsHook
97 ];
98
99 propagatedBuildInputs = [
100 cloudpickle
101 crcmod
102 dill
103 fastavro
104 fasteners
105 grpcio
106 hdfs
107 httplib2
108 numpy
109 objsize
110 orjson
111 proto-plus
112 protobuf
113 pyarrow
114 pydot
115 pymongo
116 python-dateutil
117 pytz
118 regex
119 requests
120 typing-extensions
121 zstandard
122 ];
123
124 enableParallelBuilding = true;
125
126 pythonImportsCheck = [
127 "apache_beam"
128 ];
129
130 checkInputs = [
131 freezegun
132 hypothesis
133 mock
134 pandas
135 parameterized
136 psycopg2
137 pyhamcrest
138 pytestCheckHook
139 pytest-xdist
140 pyyaml
141 requests-mock
142 scikit-learn
143 sqlalchemy
144 tenacity
145 testcontainers
146 ];
147
148 # Make sure we're running the tests for the actually installed
149 # package, so that cython's .so files are available.
150 preCheck = "cd $out/lib/${python.libPrefix}/site-packages";
151
152 disabledTestPaths = [
153 # Fails with
154 # _______ ERROR collecting apache_beam/io/external/xlang_jdbcio_it_test.py _______
155 # apache_beam/io/external/xlang_jdbcio_it_test.py:80: in <module>
156 # class CrossLanguageJdbcIOTest(unittest.TestCase):
157 # apache_beam/io/external/xlang_jdbcio_it_test.py:99: in CrossLanguageJdbcIOTest
158 # container_init: Callable[[], Union[PostgresContainer, MySqlContainer]],
159 # E NameError: name 'MySqlContainer' is not defined
160 #
161 "apache_beam/io/external/xlang_jdbcio_it_test.py"
162
163 # These tests depend on the availability of specific servers backends.
164 "apache_beam/runners/portability/flink_runner_test.py"
165 "apache_beam/runners/portability/samza_runner_test.py"
166 "apache_beam/runners/portability/spark_runner_test.py"
167
168 # Fails starting from dill 0.3.6 because it tries to pickle pytest globals:
169 # https://github.com/uqfoundation/dill/issues/482#issuecomment-1139017499.
170 "apache_beam/transforms/window_test.py"
171
172 # See https://github.com/apache/beam/issues/25390.
173 "apache_beam/coders/slow_coders_test.py"
174 "apache_beam/dataframe/pandas_doctests_test.py"
175 "apache_beam/typehints/typed_pipeline_test.py"
176 "apache_beam/coders/fast_coders_test.py"
177 "apache_beam/dataframe/schemas_test.py"
178 ];
179
180 disabledTests = [
181 # The reasons of failures for these tests are unclear.
182 # They reproduce in Docker with Ubuntu 22.04
183 # (= they're not `nixpkgs`-specific) but given the upstream uses
184 # quite elaborate testing infra with containers and multiple
185 # different runners - I don't expect them to help debugging these
186 # when running via our (= custom from their PoV) testing infra.
187 "test_with_main_session"
188 # AssertionErrors
189 "test_unified_repr"
190 "testDictComprehension"
191 "testDictComprehensionSimple"
192 "testGenerator"
193 "testGeneratorComprehension"
194 "testListComprehension"
195 "testNoneReturn"
196 "testSet"
197 "testTupleListComprehension"
198 "test_newtype"
199 "test_pardo_type_inference"
200 "test_get_output_batch_type"
201 "test_pformat_namedtuple_with_unnamed_fields"
202 "test_row_coder_fail_early_bad_schema"
203 # See https://github.com/apache/beam/issues/26004.
204 "test_batch_encode_decode"
205 ];
206
207 meta = with lib; {
208 description = "Unified model for defining both batch and streaming data-parallel processing pipelines";
209 homepage = "https://beam.apache.org/";
210 license = licenses.asl20;
211 maintainers = with maintainers; [ ndl ];
212 # https://github.com/apache/beam/issues/27221
213 broken = lib.versionAtLeast pandas.version "2";
214 };
215}