1{ buildPythonPackage
2, cloudpickle
3, crcmod
4, cython
5, dill
6, fastavro
7, fetchFromGitHub
8, fetchpatch
9, freezegun
10, grpcio
11, grpcio-tools
12, hdfs
13, httplib2
14, lib
15, mock
16, mypy-protobuf
17, numpy
18, oauth2client
19, orjson
20, pandas
21, parameterized
22, proto-plus
23, protobuf
24, psycopg2
25, pyarrow
26, pydot
27, pyhamcrest
28, pymongo
29, pytestCheckHook
30, python
31, python-dateutil
32, pythonAtLeast
33, pythonRelaxDepsHook
34, pytz
35, pyyaml
36, requests
37, requests-mock
38, scikit-learn
39, setuptools
40, sqlalchemy
41, tenacity
42, testcontainers
43, typing-extensions
44}:
45
46buildPythonPackage rec {
47 pname = "apache-beam";
48 version = "2.40.0";
49
50 src = fetchFromGitHub {
51 owner = "apache";
52 repo = "beam";
53 rev = "v${version}";
54 sha256 = "sha256-0S7Dj6PMSbZkEAY6ZLUpKVfe/tFxsq60TTAFj0Qhtv0=";
55 };
56
57 patches = [
58 (fetchpatch {
59 # https://github.com/apache/beam/pull/24143
60 name = "fix-for-dill-0.3.6.patch";
61 url = "https://github.com/apache/beam/commit/7e014435b816015d21cc07f3f6c80809f3d8023d.patch";
62 hash = "sha256-iUmnzrItTFM98w3mpadzrmtI3t0fucpSujAg/6qxCGk=";
63 stripLen = 2;
64 })
65 ];
66
67 pythonRelaxDeps = [
68 # See https://github.com/NixOS/nixpkgs/issues/156957
69 "dill"
70 "numpy"
71 "pyarrow"
72 "pymongo"
73
74 # See https://github.com/NixOS/nixpkgs/issues/193613
75 "protobuf"
76 ];
77
78 sourceRoot = "source/sdks/python";
79
80 nativeBuildInputs = [
81 cython
82 grpcio-tools
83 mypy-protobuf
84 pythonRelaxDepsHook
85 ];
86
87 propagatedBuildInputs = [
88 cloudpickle
89 crcmod
90 cython
91 dill
92 fastavro
93 grpcio
94 hdfs
95 httplib2
96 numpy
97 oauth2client
98 orjson
99 proto-plus
100 protobuf
101 pyarrow
102 pydot
103 pymongo
104 python-dateutil
105 pytz
106 requests
107 setuptools
108 typing-extensions
109 ];
110
111 enableParallelBuilding = true;
112
113 pythonImportsCheck = [
114 "apache_beam"
115 ];
116
117 checkInputs = [
118 freezegun
119 mock
120 pandas
121 parameterized
122 psycopg2
123 pyhamcrest
124 pytestCheckHook
125 pyyaml
126 requests-mock
127 scikit-learn
128 sqlalchemy
129 tenacity
130 testcontainers
131 ];
132
133 # Make sure we're running the tests for the actually installed
134 # package, so that cython's .so files are available.
135 preCheck = "cd $out/lib/${python.libPrefix}/site-packages";
136
137 disabledTestPaths = [
138 # Fails with
139 # _______ ERROR collecting apache_beam/io/external/xlang_jdbcio_it_test.py _______
140 # apache_beam/io/external/xlang_jdbcio_it_test.py:80: in <module>
141 # class CrossLanguageJdbcIOTest(unittest.TestCase):
142 # apache_beam/io/external/xlang_jdbcio_it_test.py:99: in CrossLanguageJdbcIOTest
143 # container_init: Callable[[], Union[PostgresContainer, MySqlContainer]],
144 # E NameError: name 'MySqlContainer' is not defined
145 #
146 "apache_beam/io/external/xlang_jdbcio_it_test.py"
147
148 # These tests depend on the availability of specific servers backends.
149 "apache_beam/runners/portability/flink_runner_test.py"
150 "apache_beam/runners/portability/samza_runner_test.py"
151 "apache_beam/runners/portability/spark_runner_test.py"
152
153 # Fails starting from dill 0.3.6 because it tries to pickle pytest globals:
154 # https://github.com/uqfoundation/dill/issues/482#issuecomment-1139017499.
155 "apache_beam/transforms/window_test.py"
156 ];
157
158 disabledTests = [
159 # The reasons of failures for these tests are unclear.
160 # They reproduce in Docker with Ubuntu 22.04
161 # (= they're not `nixpkgs`-specific) but given the upstream uses
162 # quite elaborate testing infra with containers and multiple
163 # different runners - I don't expect them to help debugging these
164 # when running via our (= custom from their PoV) testing infra.
165 "test_with_main_session"
166 # AssertionErrors
167 "test_unified_repr"
168 "testDictComprehension"
169 "testDictComprehensionSimple"
170 "testGenerator"
171 "testGeneratorComprehension"
172 "testListComprehension"
173 "testNoneReturn"
174 "testSet"
175 "testTupleListComprehension"
176 "test_newtype"
177 "test_pardo_type_inference"
178 "test_get_output_batch_type"
179 "test_pformat_namedtuple_with_unnamed_fields"
180 "test_row_coder_fail_early_bad_schema"
181 ];
182
183 meta = with lib; {
184 description = "Unified model for defining both batch and streaming data-parallel processing pipelines";
185 homepage = "https://beam.apache.org/";
186 license = licenses.asl20;
187 maintainers = with maintainers; [ ndl ];
188 };
189}