nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
1{ buildPythonPackage
2, cloudpickle
3, crcmod
4, cython
5, dill
6, fastavro
7, fetchFromGitHub
8, fetchpatch
9, freezegun
10, grpcio
11, grpcio-tools
12, hdfs
13, httplib2
14, lib
15, mock
16, mypy-protobuf
17, numpy
18, oauth2client
19, orjson
20, pandas
21, parameterized
22, proto-plus
23, protobuf
24, psycopg2
25, pyarrow
26, pydot
27, pyhamcrest
28, pymongo
29, pytestCheckHook
30, python
31, pythonAtLeast
32, python-dateutil
33, pytz
34, pyyaml
35, requests
36, requests-mock
37, setuptools
38, sqlalchemy
39, tenacity
40, typing-extensions
41}:
42
43buildPythonPackage rec {
44 pname = "apache-beam";
45 version = "2.37.0";
46 disabled = pythonAtLeast "3.10";
47
48 src = fetchFromGitHub {
49 owner = "apache";
50 repo = "beam";
51 rev = "v${version}";
52 sha256 = "sha256-FmfTxRLqXUHhhAZIxCRx2+phX0bmU5rIHaftBU4yBJY=";
53 };
54
55 patches = [
56 # patch in the pyarrow.Table.to_batches(max_chunksize=...) argument fix
57 (fetchpatch {
58 url = "https://github.com/apache/beam/commit/2418a14ee99ff490d1c82944043f97f37ec97a85.patch";
59 sha256 = "sha256-G8ARBBf7nmF46P2ncnlteGFnPWq5iCqZDfuaosre9jY=";
60 stripLen = 2;
61 })
62 ];
63
64 # See https://github.com/NixOS/nixpkgs/issues/156957.
65 postPatch = ''
66 substituteInPlace setup.py \
67 --replace "dill>=0.3.1.1,<0.3.2" "dill" \
68 --replace "httplib2>=0.8,<0.20.0" "httplib2" \
69 --replace "pyarrow>=0.15.1,<7.0.0" "pyarrow"
70 '';
71
72 sourceRoot = "source/sdks/python";
73
74 nativeBuildInputs = [
75 cython
76 grpcio-tools
77 mypy-protobuf
78 ];
79
80 propagatedBuildInputs = [
81 cloudpickle
82 crcmod
83 cython
84 dill
85 fastavro
86 grpcio
87 hdfs
88 httplib2
89 numpy
90 oauth2client
91 orjson
92 proto-plus
93 protobuf
94 pyarrow
95 pydot
96 pymongo
97 python-dateutil
98 pytz
99 requests
100 setuptools
101 typing-extensions
102 ];
103
104 pythonImportsCheck = [
105 "apache_beam"
106 ];
107
108 checkInputs = [
109 freezegun
110 mock
111 pandas
112 parameterized
113 psycopg2
114 pyhamcrest
115 pytestCheckHook
116 pyyaml
117 requests-mock
118 sqlalchemy
119 tenacity
120 ];
121
122 # Make sure we're running the tests for the actually installed
123 # package, so that cython's .so files are available.
124 preCheck = "cd $out/lib/${python.libPrefix}/site-packages";
125
126 disabledTestPaths = [
127 # Fails with
128 # _______ ERROR collecting apache_beam/io/external/xlang_jdbcio_it_test.py _______
129 # apache_beam/io/external/xlang_jdbcio_it_test.py:80: in <module>
130 # class CrossLanguageJdbcIOTest(unittest.TestCase):
131 # apache_beam/io/external/xlang_jdbcio_it_test.py:99: in CrossLanguageJdbcIOTest
132 # container_init: Callable[[], Union[PostgresContainer, MySqlContainer]],
133 # E NameError: name 'MySqlContainer' is not defined
134 #
135 # Test relies on the testcontainers package, which is not currently (as of
136 # 2022-04-08) available in nixpkgs.
137 "apache_beam/io/external/xlang_jdbcio_it_test.py"
138
139 # These tests depend on the availability of specific servers backends.
140 "apache_beam/runners/portability/flink_runner_test.py"
141 "apache_beam/runners/portability/samza_runner_test.py"
142 "apache_beam/runners/portability/spark_runner_test.py"
143 ];
144
145 disabledTests = [
146 # The reasons of failures for these tests are unclear.
147 # They reproduce in Docker with Ubuntu 22.04
148 # (= they're not `nixpkgs`-specific) but given the upstream uses
149 # quite elaborate testing infra with containers and multiple
150 # different runners - I don't expect them to help debugging these
151 # when running via our (= custom from their PoV) testing infra.
152 "test_with_main_session"
153 ];
154
155 meta = with lib; {
156 description = "Unified model for defining both batch and streaming data-parallel processing pipelines";
157 homepage = "https://beam.apache.org/";
158 license = licenses.asl20;
159 maintainers = with maintainers; [ ndl ];
160 };
161}