1{ lib
2, stdenv
3, python
4, buildPythonPackage
5, fetchFromGitHub
6, alembic
7, argcomplete
8, asgiref
9, attrs
10, blinker
11, cached-property
12, cattrs
13, clickclick
14, colorlog
15, configupdater
16, connexion
17, cron-descriptor
18, croniter
19, cryptography
20, deprecated
21, dill
22, flask
23, flask-login
24, flask-appbuilder
25, flask-caching
26, flask-session
27, flask-wtf
28, gitpython
29, google-re2
30, graphviz
31, gunicorn
32, httpx
33, iso8601
34, importlib-resources
35, importlib-metadata
36, inflection
37, itsdangerous
38, jinja2
39, jsonschema
40, lazy-object-proxy
41, linkify-it-py
42, lockfile
43, markdown
44, markupsafe
45, marshmallow-oneofschema
46, mdit-py-plugins
47, numpy
48, openapi-spec-validator
49, opentelemetry-api
50, opentelemetry-exporter-otlp
51, pandas
52, pathspec
53, pendulum
54, psutil
55, pydantic
56, pygments
57, pyjwt
58, python-daemon
59, python-dateutil
60, python-nvd3
61, python-slugify
62, python3-openid
63, pythonOlder
64, pyyaml
65, rich
66, rich-argparse
67, setproctitle
68, sqlalchemy
69, sqlalchemy-jsonfield
70, swagger-ui-bundle
71, tabulate
72, tenacity
73, termcolor
74, typing-extensions
75, unicodecsv
76, werkzeug
77, freezegun
78, pytest-asyncio
79, pytestCheckHook
80, time-machine
81, mkYarnPackage
82, fetchYarnDeps
83, writeScript
84
85# Extra airflow providers to enable
86, enabledProviders ? []
87}:
88let
89 version = "2.7.3";
90
91 airflow-src = fetchFromGitHub rec {
92 owner = "apache";
93 repo = "airflow";
94 rev = "refs/tags/${version}";
95 # Download using the git protocol rather than using tarballs, because the
96 # GitHub archive tarballs don't appear to include tests
97 forceFetchGit = true;
98 hash = "sha256-+YbiKFZLigSDbHPaUKIl97kpezW1rIt/j09MMa6lwhQ=";
99 };
100
101 # airflow bundles a web interface, which is built using webpack by an undocumented shell script in airflow's source tree.
102 # This replicates this shell script, fixing bugs in yarn.lock and package.json
103
104 airflow-frontend = mkYarnPackage rec {
105 name = "airflow-frontend";
106
107 src = "${airflow-src}/airflow/www";
108 packageJSON = ./package.json;
109
110 offlineCache = fetchYarnDeps {
111 yarnLock = "${src}/yarn.lock";
112 hash = "sha256-WQKuQgNp35fU6z7owequXOSwoUGJDJYcUgkjPDMOops=";
113 };
114
115 distPhase = "true";
116
117 # The webpack license plugin tries to create /licenses when given the
118 # original relative path
119 postPatch = ''
120 sed -i 's!../../../../licenses/LICENSES-ui.txt!licenses/LICENSES-ui.txt!' webpack.config.js
121 '';
122
123 configurePhase = ''
124 cp -r $node_modules node_modules
125 '';
126
127 buildPhase = ''
128 yarn --offline build
129 find package.json yarn.lock static/css static/js -type f | sort | xargs md5sum > static/dist/sum.md5
130 '';
131
132 installPhase = ''
133 mkdir -p $out/static/
134 cp -r static/dist $out/static
135 '';
136 };
137
138 # Import generated file with metadata for provider dependencies and imports.
139 # Enable additional providers using enabledProviders above.
140 providers = import ./providers.nix;
141 getProviderDeps = provider: map (dep: python.pkgs.${dep}) providers.${provider}.deps;
142 getProviderImports = provider: providers.${provider}.imports;
143 providerDependencies = lib.concatMap getProviderDeps enabledProviders;
144 providerImports = lib.concatMap getProviderImports enabledProviders;
145in
146buildPythonPackage rec {
147 pname = "apache-airflow";
148 inherit version;
149 src = airflow-src;
150
151 disabled = pythonOlder "3.7";
152
153 propagatedBuildInputs = [
154 alembic
155 argcomplete
156 asgiref
157 attrs
158 blinker
159 cached-property
160 cattrs
161 clickclick
162 colorlog
163 configupdater
164 connexion
165 cron-descriptor
166 croniter
167 cryptography
168 deprecated
169 dill
170 flask
171 flask-appbuilder
172 flask-caching
173 flask-session
174 flask-wtf
175 flask-login
176 gitpython
177 google-re2
178 graphviz
179 gunicorn
180 httpx
181 iso8601
182 importlib-resources
183 inflection
184 itsdangerous
185 jinja2
186 jsonschema
187 lazy-object-proxy
188 linkify-it-py
189 lockfile
190 markdown
191 markupsafe
192 marshmallow-oneofschema
193 mdit-py-plugins
194 numpy
195 openapi-spec-validator
196 opentelemetry-api
197 opentelemetry-exporter-otlp
198 pandas
199 pathspec
200 pendulum
201 psutil
202 pydantic
203 pygments
204 pyjwt
205 python-daemon
206 python-dateutil
207 python-nvd3
208 python-slugify
209 python3-openid
210 pyyaml
211 rich
212 rich-argparse
213 setproctitle
214 sqlalchemy
215 sqlalchemy-jsonfield
216 swagger-ui-bundle
217 tabulate
218 tenacity
219 termcolor
220 typing-extensions
221 unicodecsv
222 werkzeug
223 ] ++ lib.optionals (pythonOlder "3.9") [
224 importlib-metadata
225 ] ++ providerDependencies;
226
227 buildInputs = [
228 airflow-frontend
229 ];
230
231 nativeCheckInputs = [
232 freezegun
233 pytest-asyncio
234 pytestCheckHook
235 time-machine
236 ];
237
238 # By default, source code of providers is included but unusable due to missing
239 # transitive dependencies. To enable a provider, add it to extraProviders
240 # above
241 INSTALL_PROVIDERS_FROM_SOURCES = "true";
242
243 postPatch = ''
244 # https://github.com/apache/airflow/issues/33854
245 substituteInPlace pyproject.toml \
246 --replace '[project]' $'[project]\nname = "apache-airflow"\nversion = "${version}"'
247 '' + lib.optionalString stdenv.isDarwin ''
248 # Fix failing test on Hydra
249 substituteInPlace airflow/utils/db.py \
250 --replace "/tmp/sqlite_default.db" "$TMPDIR/sqlite_default.db"
251 '';
252
253 pythonRelaxDeps = [
254 "colorlog"
255 "flask-appbuilder"
256 "opentelemetry-api"
257 "pathspec"
258 ];
259
260 # allow for gunicorn processes to have access to Python packages
261 makeWrapperArgs = [
262 "--prefix PYTHONPATH : $PYTHONPATH"
263 ];
264
265 postInstall = ''
266 cp -rv ${airflow-frontend}/static/dist $out/${python.sitePackages}/airflow/www/static
267 # Needed for pythonImportsCheck below
268 export HOME=$(mktemp -d)
269 '';
270
271 pythonImportsCheck = [
272 "airflow"
273 ] ++ providerImports;
274
275 preCheck = ''
276 export AIRFLOW_HOME=$HOME
277 export AIRFLOW__CORE__UNIT_TEST_MODE=True
278 export AIRFLOW_DB="$HOME/airflow.db"
279 export PATH=$PATH:$out/bin
280
281 airflow version
282 airflow db init
283 airflow db reset -y
284 '';
285
286 pytestFlagsArray = [
287 "tests/core/test_core.py"
288 ];
289
290 disabledTests = lib.optionals stdenv.isDarwin [
291 "bash_operator_kill" # psutil.AccessDenied
292 ];
293
294 # Updates yarn.lock and package.json
295 passthru.updateScript = writeScript "update.sh" ''
296 #!/usr/bin/env nix-shell
297 #!nix-shell -i bash -p common-updater-scripts curl pcre "python3.withPackages (ps: with ps; [ pyyaml ])" yarn2nix
298
299 set -euo pipefail
300
301 # Get new version
302 new_version="$(curl -s https://airflow.apache.org/docs/apache-airflow/stable/release_notes.html |
303 pcregrep -o1 'Airflow ([0-9.]+).' | head -1)"
304 update-source-version ${pname} "$new_version"
305
306 # Update frontend
307 cd ./pkgs/servers/apache-airflow
308 curl -O https://raw.githubusercontent.com/apache/airflow/$new_version/airflow/www/yarn.lock
309 curl -O https://raw.githubusercontent.com/apache/airflow/$new_version/airflow/www/package.json
310 yarn2nix > yarn.nix
311
312 # update provider dependencies
313 ./update-providers.py
314 '';
315
316 # Note on testing the web UI:
317 # You can (manually) test the web UI as follows:
318 #
319 # nix shell .#apache-airflow
320 # airflow db reset # WARNING: this will wipe any existing db state you might have!
321 # airflow db init
322 # airflow standalone
323 #
324 # Then navigate to the localhost URL using the credentials printed, try
325 # triggering the 'example_bash_operator' and 'example_bash_operator' DAGs and
326 # see if they report success.
327
328 meta = with lib; {
329 description = "Programmatically author, schedule and monitor data pipelines";
330 homepage = "https://airflow.apache.org/";
331 license = licenses.asl20;
332 maintainers = with maintainers; [ bhipple gbpdt ingenieroariel ];
333 knownVulnerabilities = [
334 "CVE-2023-50943"
335 "CVE-2023-50944"
336 ];
337 };
338}