1{ stdenv, bazel_5, buildBazelPackage, isPy3k, lib, fetchFromGitHub, symlinkJoin
2, addOpenGLRunpath, fetchpatch
3# Python deps
4, buildPythonPackage, pythonOlder, python
5# Python libraries
6, numpy, tensorboard, absl-py
7, packaging, setuptools, wheel, keras, keras-preprocessing, google-pasta
8, opt-einsum, astunparse, h5py
9, termcolor, grpcio, six, wrapt, protobuf-python, tensorflow-estimator-bin
10, dill, flatbuffers-python, portpicker, tblib, typing-extensions
11# Common deps
12, git, pybind11, which, binutils, glibcLocales, cython, perl, coreutils
13# Common libraries
14, jemalloc, mpi, gast, grpc, sqlite, boringssl, jsoncpp, nsync
15, curl, snappy, flatbuffers-core, lmdb-core, icu, double-conversion, libpng, libjpeg_turbo, giflib, protobuf-core
16# Upstream by default includes cuda support since tensorflow 1.15. We could do
17# that in nix as well. It would make some things easier and less confusing, but
18# it would also make the default tensorflow package unfree. See
19# https://groups.google.com/a/tensorflow.org/forum/#!topic/developers/iRCt5m4qUz0
20, cudaSupport ? false
21, cudaPackages ? { }
22, cudaCapabilities ? cudaPackages.cudaFlags.cudaCapabilities
23, mklSupport ? false, mkl
24, tensorboardSupport ? true
25# XLA without CUDA is broken
26, xlaSupport ? cudaSupport
27, sse42Support ? stdenv.hostPlatform.sse4_2Support
28, avx2Support ? stdenv.hostPlatform.avx2Support
29, fmaSupport ? stdenv.hostPlatform.fmaSupport
30# Darwin deps
31, Foundation, Security, cctools, llvmPackages_11
32}:
33
34let
35 originalStdenv = stdenv;
36in
37let
38 # Tensorflow looks at many toolchain-related variables which may diverge.
39 #
40 # Toolchain for cuda-enabled builds.
41 # We want to achieve two things:
42 # 1. NVCC should use a compatible back-end (e.g. gcc11 for cuda11)
43 # 2. Normal C++ files should be compiled with the same toolchain,
44 # to avoid potential weird dynamic linkage errors at runtime.
45 # This may not be necessary though
46 #
47 # Toolchain for Darwin:
48 # clang 7 fails to emit a symbol for
49 # __ZN4llvm11SmallPtrSetIPKNS_10AllocaInstELj8EED1Ev in any of the
50 # translation units, so the build fails at link time
51 stdenv =
52 if cudaSupport then cudaPackages.backendStdenv
53 else if originalStdenv.isDarwin then llvmPackages_11.stdenv
54 else originalStdenv;
55 inherit (cudaPackages) cudatoolkit cudnn nccl;
56in
57
58assert cudaSupport -> cudatoolkit != null
59 && cudnn != null;
60
61# unsupported combination
62assert ! (stdenv.isDarwin && cudaSupport);
63
64let
65 withTensorboard = (pythonOlder "3.6") || tensorboardSupport;
66
67 # FIXME: migrate to redist cudaPackages
68 cudatoolkit_joined = symlinkJoin {
69 name = "${cudatoolkit.name}-merged";
70 paths = [
71 cudatoolkit.lib
72 cudatoolkit.out
73 ] ++ lib.optionals (lib.versionOlder cudatoolkit.version "11") [
74 # for some reason some of the required libs are in the targets/x86_64-linux
75 # directory; not sure why but this works around it
76 "${cudatoolkit}/targets/${stdenv.system}"
77 ];
78 };
79
80 # Tensorflow expects bintools at hard-coded paths, e.g. /usr/bin/ar
81 # The only way to overcome that is to set GCC_HOST_COMPILER_PREFIX,
82 # but that path must contain cc as well, so we merge them
83 cudatoolkit_cc_joined = symlinkJoin {
84 name = "${stdenv.cc.name}-merged";
85 paths = [
86 stdenv.cc
87 binutils.bintools # for ar, dwp, nm, objcopy, objdump, strip
88 ];
89 };
90
91 # Needed for _some_ system libraries, grep INCLUDEDIR.
92 includes_joined = symlinkJoin {
93 name = "tensorflow-deps-merged";
94 paths = [
95 jsoncpp
96 ];
97 };
98
99 tfFeature = x: if x then "1" else "0";
100
101 version = "2.11.1";
102 variant = lib.optionalString cudaSupport "-gpu";
103 pname = "tensorflow${variant}";
104
105 pythonEnv = python.withPackages (_:
106 [ # python deps needed during wheel build time (not runtime, see the buildPythonPackage part for that)
107 # This list can likely be shortened, but each trial takes multiple hours so won't bother for now.
108 absl-py
109 astunparse
110 dill
111 flatbuffers-python
112 gast
113 google-pasta
114 grpcio
115 h5py
116 keras-preprocessing
117 numpy
118 opt-einsum
119 packaging
120 protobuf-python
121 setuptools
122 six
123 tblib
124 tensorboard
125 tensorflow-estimator-bin
126 termcolor
127 typing-extensions
128 wheel
129 wrapt
130 ]);
131
132 rules_cc_darwin_patched = stdenv.mkDerivation {
133 name = "rules_cc-${pname}-${version}";
134
135 src = _bazel-build.deps;
136
137 prePatch = "pushd rules_cc";
138 patches = [
139 # https://github.com/bazelbuild/rules_cc/issues/122
140 (fetchpatch {
141 name = "tensorflow-rules_cc-libtool-path.patch";
142 url = "https://github.com/bazelbuild/rules_cc/commit/8c427ab30bf213630dc3bce9d2e9a0e29d1787db.diff";
143 hash = "sha256-C4v6HY5+jm0ACUZ58gBPVejCYCZfuzYKlHZ0m2qDHCk=";
144 })
145
146 # https://github.com/bazelbuild/rules_cc/pull/124
147 (fetchpatch {
148 name = "tensorflow-rules_cc-install_name_tool-path.patch";
149 url = "https://github.com/bazelbuild/rules_cc/commit/156497dc89100db8a3f57b23c63724759d431d05.diff";
150 hash = "sha256-NES1KeQmMiUJQVoV6dS4YGRxxkZEjOpFSCyOq9HZYO0=";
151 })
152 ];
153 postPatch = "popd";
154
155 dontConfigure = true;
156 dontBuild = true;
157
158 installPhase = ''
159 runHook preInstall
160
161 mv rules_cc/ "$out"
162
163 runHook postInstall
164 '';
165 };
166 llvm-raw_darwin_patched = stdenv.mkDerivation {
167 name = "llvm-raw-${pname}-${version}";
168
169 src = _bazel-build.deps;
170
171 prePatch = "pushd llvm-raw";
172 patches = [
173 # Fix a vendored config.h that requires the 10.13 SDK
174 ./llvm_bazel_fix_macos_10_12_sdk.patch
175 ];
176 postPatch = ''
177 touch {BUILD,WORKSPACE}
178 popd
179 '';
180
181 dontConfigure = true;
182 dontBuild = true;
183
184 installPhase = ''
185 runHook preInstall
186
187 mv llvm-raw/ "$out"
188
189 runHook postInstall
190 '';
191 };
192 bazel-build = if stdenv.isDarwin then _bazel-build.overrideAttrs (prev: {
193 bazelFlags = prev.bazelFlags ++ [
194 "--override_repository=rules_cc=${rules_cc_darwin_patched}"
195 "--override_repository=llvm-raw=${llvm-raw_darwin_patched}"
196 ];
197 preBuild = ''
198 export AR="${cctools}/bin/libtool"
199 '';
200 }) else _bazel-build;
201
202 _bazel-build = buildBazelPackage.override { inherit stdenv; } {
203 name = "${pname}-${version}";
204 bazel = bazel_5;
205
206 src = fetchFromGitHub {
207 owner = "tensorflow";
208 repo = "tensorflow";
209 rev = "refs/tags/v${version}";
210 hash = "sha256-q59cUW6613byHk4LGl+sefO5czLSWxOrSyLbJ1pkNEY=";
211 };
212
213 # On update, it can be useful to steal the changes from gentoo
214 # https://gitweb.gentoo.org/repo/gentoo.git/tree/sci-libs/tensorflow
215
216 nativeBuildInputs = [
217 which pythonEnv cython perl protobuf-core
218 ] ++ lib.optional cudaSupport addOpenGLRunpath;
219
220 buildInputs = [
221 jemalloc
222 mpi
223 glibcLocales
224 git
225
226 # libs taken from system through the TF_SYS_LIBS mechanism
227 boringssl
228 curl
229 double-conversion
230 flatbuffers-core
231 giflib
232 grpc
233 # Necessary to fix the "`GLIBCXX_3.4.30' not found" error
234 (icu.override { inherit stdenv; })
235 jsoncpp
236 libjpeg_turbo
237 libpng
238 lmdb-core
239 (pybind11.overridePythonAttrs (_: { inherit stdenv; }))
240 snappy
241 sqlite
242 ] ++ lib.optionals cudaSupport [
243 cudatoolkit
244 cudnn
245 ] ++ lib.optionals mklSupport [
246 mkl
247 ] ++ lib.optionals stdenv.isDarwin [
248 Foundation
249 Security
250 ] ++ lib.optionals (!stdenv.isDarwin) [
251 nsync
252 ];
253
254 # arbitrarily set to the current latest bazel version, overly careful
255 TF_IGNORE_MAX_BAZEL_VERSION = true;
256
257 LIBTOOL = lib.optionalString stdenv.isDarwin "${cctools}/bin/libtool";
258
259 # Take as many libraries from the system as possible. Keep in sync with
260 # list of valid syslibs in
261 # https://github.com/tensorflow/tensorflow/blob/master/third_party/systemlibs/syslibs_configure.bzl
262 TF_SYSTEM_LIBS = lib.concatStringsSep "," ([
263 "absl_py"
264 "astor_archive"
265 "astunparse_archive"
266 "boringssl"
267 # Not packaged in nixpkgs
268 # "com_github_googleapis_googleapis"
269 # "com_github_googlecloudplatform_google_cloud_cpp"
270 "com_github_grpc_grpc"
271 "com_google_protobuf"
272 # Fails with the error: external/org_tensorflow/tensorflow/core/profiler/utils/tf_op_utils.cc:46:49: error: no matching function for call to 're2::RE2::FullMatch(absl::lts_2020_02_25::string_view&, re2::RE2&)'
273 # "com_googlesource_code_re2"
274 "curl"
275 "cython"
276 "dill_archive"
277 "double_conversion"
278 "flatbuffers"
279 "functools32_archive"
280 "gast_archive"
281 "gif"
282 "hwloc"
283 "icu"
284 "jsoncpp_git"
285 "libjpeg_turbo"
286 "lmdb"
287 "nasm"
288 "opt_einsum_archive"
289 "org_sqlite"
290 "pasta"
291 "png"
292 "pybind11"
293 "six_archive"
294 "snappy"
295 "tblib_archive"
296 "termcolor_archive"
297 "typing_extensions_archive"
298 "wrapt"
299 "zlib"
300 ] ++ lib.optionals (!stdenv.isDarwin) [
301 "nsync" # fails to build on darwin
302 ]);
303
304 INCLUDEDIR = "${includes_joined}/include";
305
306 # This is needed for the Nix-provided protobuf dependency to work,
307 # as otherwise the rule `link_proto_files` tries to create the links
308 # to `/usr/include/...` which results in build failures.
309 PROTOBUF_INCLUDE_PATH = "${protobuf-core}/include";
310
311 PYTHON_BIN_PATH = pythonEnv.interpreter;
312
313 TF_NEED_GCP = true;
314 TF_NEED_HDFS = true;
315 TF_ENABLE_XLA = tfFeature xlaSupport;
316
317 CC_OPT_FLAGS = " ";
318
319 # https://github.com/tensorflow/tensorflow/issues/14454
320 TF_NEED_MPI = tfFeature cudaSupport;
321
322 TF_NEED_CUDA = tfFeature cudaSupport;
323 TF_CUDA_PATHS = lib.optionalString cudaSupport "${cudatoolkit_joined},${cudnn},${nccl}";
324 TF_CUDA_COMPUTE_CAPABILITIES = lib.concatStringsSep "," cudaCapabilities;
325
326 # Needed even when we override stdenv: e.g. for ar
327 GCC_HOST_COMPILER_PREFIX = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin";
328 GCC_HOST_COMPILER_PATH = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin/cc";
329
330 postPatch = ''
331 # bazel 3.3 should work just as well as bazel 3.1
332 rm -f .bazelversion
333 patchShebangs .
334 '' + lib.optionalString (stdenv.hostPlatform.system == "x86_64-darwin") ''
335 cat ${./com_google_absl_fix_macos.patch} >> third_party/absl/com_google_absl_fix_mac_and_nvcc_build.patch
336 '' + lib.optionalString (!withTensorboard) ''
337 # Tensorboard pulls in a bunch of dependencies, some of which may
338 # include security vulnerabilities. So we make it optional.
339 # https://github.com/tensorflow/tensorflow/issues/20280#issuecomment-400230560
340 sed -i '/tensorboard ~=/d' tensorflow/tools/pip_package/setup.py
341 '';
342
343 # https://github.com/tensorflow/tensorflow/pull/39470
344 env.NIX_CFLAGS_COMPILE = toString [ "-Wno-stringop-truncation" ];
345
346 preConfigure = let
347 opt_flags = []
348 ++ lib.optionals sse42Support ["-msse4.2"]
349 ++ lib.optionals avx2Support ["-mavx2"]
350 ++ lib.optionals fmaSupport ["-mfma"];
351 in ''
352 patchShebangs configure
353
354 # dummy ldconfig
355 mkdir dummy-ldconfig
356 echo "#!${stdenv.shell}" > dummy-ldconfig/ldconfig
357 chmod +x dummy-ldconfig/ldconfig
358 export PATH="$PWD/dummy-ldconfig:$PATH"
359
360 export PYTHON_LIB_PATH="$NIX_BUILD_TOP/site-packages"
361 export CC_OPT_FLAGS="${lib.concatStringsSep " " opt_flags}"
362 mkdir -p "$PYTHON_LIB_PATH"
363
364 # To avoid mixing Python 2 and Python 3
365 unset PYTHONPATH
366 '';
367
368 configurePhase = ''
369 runHook preConfigure
370 ./configure
371 runHook postConfigure
372 '';
373
374 hardeningDisable = [ "format" ];
375
376 bazelBuildFlags = [
377 "--config=opt" # optimize using the flags set in the configure phase
378 ]
379 ++ lib.optionals stdenv.cc.isClang [
380 "--cxxopt=-x" "--cxxopt=c++"
381 "--host_cxxopt=-x" "--host_cxxopt=c++"
382
383 # workaround for https://github.com/bazelbuild/bazel/issues/15359
384 "--spawn_strategy=sandboxed"
385 ]
386 ++ lib.optionals (mklSupport) [ "--config=mkl" ];
387
388 bazelTargets = [ "//tensorflow/tools/pip_package:build_pip_package //tensorflow/tools/lib_package:libtensorflow" ];
389
390 removeRulesCC = false;
391 # Without this Bazel complaints about sandbox violations.
392 dontAddBazelOpts = true;
393
394 fetchAttrs = {
395 sha256 = {
396 x86_64-linux = if cudaSupport
397 then "sha256-lURiR0Ra4kynDXyfuONG+A7CpxnAsfKzIdFTExKzp1o="
398 else "sha256-lDvRgj+UlaneRGZOO9UVCb6uyxcbRJfUhABf/sgKPi0=";
399 aarch64-linux = "sha256-z2d45fqHz5HW+qkv3fR9hMg3sEwUzJfxF54vng85bHk=";
400 x86_64-darwin = "sha256-AAvuz8o6ZRkaSYMgaep74lDDQcxOupDCX4vRaK/jnCU=";
401 aarch64-darwin = "sha256-kexRSvfQqb92ZRuUqAO070RnUUBidAqghiA7Y8do9vc=";
402 }.${stdenv.hostPlatform.system} or (throw "unsupported system ${stdenv.hostPlatform.system}");
403 };
404
405 buildAttrs = {
406 outputs = [ "out" "python" ];
407
408 preBuild = ''
409 patchShebangs .
410 '';
411
412 installPhase = ''
413 mkdir -p "$out"
414 tar -xf bazel-bin/tensorflow/tools/lib_package/libtensorflow.tar.gz -C "$out"
415 # Write pkgconfig file.
416 mkdir "$out/lib/pkgconfig"
417 cat > "$out/lib/pkgconfig/tensorflow.pc" << EOF
418 Name: TensorFlow
419 Version: ${version}
420 Description: Library for computation using data flow graphs for scalable machine learning
421 Requires:
422 Libs: -L$out/lib -ltensorflow
423 Cflags: -I$out/include/tensorflow
424 EOF
425
426 # build the source code, then copy it to $python (build_pip_package
427 # actually builds a symlink farm so we must dereference them).
428 bazel-bin/tensorflow/tools/pip_package/build_pip_package --src "$PWD/dist"
429 cp -Lr "$PWD/dist" "$python"
430 '';
431
432 postFixup = lib.optionalString cudaSupport ''
433 find $out -type f \( -name '*.so' -or -name '*.so.*' \) | while read lib; do
434 addOpenGLRunpath "$lib"
435 done
436 '';
437
438 requiredSystemFeatures = [
439 "big-parallel"
440 ];
441 };
442
443 meta = with lib; {
444 changelog = "https://github.com/tensorflow/tensorflow/releases/tag/v${version}";
445 description = "Computation using data flow graphs for scalable machine learning";
446 homepage = "http://tensorflow.org";
447 license = licenses.asl20;
448 maintainers = with maintainers; [ abbradar ];
449 platforms = with platforms; linux ++ darwin;
450 broken = !(xlaSupport -> cudaSupport);
451 } // lib.optionalAttrs stdenv.isDarwin {
452 timeout = 86400; # 24 hours
453 maxSilent = 14400; # 4h, double the default of 7200s
454 };
455 };
456
457in buildPythonPackage {
458 inherit version pname;
459 disabled = !isPy3k;
460
461 src = bazel-build.python;
462
463 # Adjust dependency requirements:
464 # - Drop tensorflow-io dependency until we get it to build
465 # - Relax flatbuffers and gast version requirements
466 # - The purpose of python3Packages.libclang is not clear at the moment and we don't have it packaged yet
467 # - keras and tensorlow-io-gcs-filesystem will be considered as optional for now.
468 postPatch = ''
469 sed -i setup.py \
470 -e '/tensorflow-io-gcs-filesystem/,+1d' \
471 -e "s/'flatbuffers[^']*',/'flatbuffers',/" \
472 -e "s/'gast[^']*',/'gast',/" \
473 -e "/'libclang[^']*',/d" \
474 -e "/'keras[^']*')\?,/d" \
475 -e "/'tensorflow-io-gcs-filesystem[^']*',/d" \
476 -e "s/'protobuf[^']*',/'protobuf',/" \
477 '';
478
479 # Upstream has a pip hack that results in bin/tensorboard being in both tensorflow
480 # and the propagated input tensorboard, which causes environment collisions.
481 # Another possibility would be to have tensorboard only in the buildInputs
482 # https://github.com/tensorflow/tensorflow/blob/v1.7.1/tensorflow/tools/pip_package/setup.py#L79
483 postInstall = ''
484 rm $out/bin/tensorboard
485 '';
486
487 setupPyGlobalFlags = [ "--project_name ${pname}" ];
488
489 # tensorflow/tools/pip_package/setup.py
490 propagatedBuildInputs = [
491 absl-py
492 astunparse
493 flatbuffers-python
494 gast
495 google-pasta
496 grpcio
497 h5py
498 keras-preprocessing
499 numpy
500 opt-einsum
501 packaging
502 protobuf-python
503 six
504 tensorflow-estimator-bin
505 termcolor
506 typing-extensions
507 wrapt
508 ] ++ lib.optionals withTensorboard [
509 tensorboard
510 ];
511
512 nativeBuildInputs = lib.optionals cudaSupport [ addOpenGLRunpath ];
513
514 postFixup = lib.optionalString cudaSupport ''
515 find $out -type f \( -name '*.so' -or -name '*.so.*' \) | while read lib; do
516 addOpenGLRunpath "$lib"
517
518 patchelf --set-rpath "${cudatoolkit}/lib:${cudatoolkit.lib}/lib:${cudnn}/lib:${nccl}/lib:$(patchelf --print-rpath "$lib")" "$lib"
519 done
520 '';
521
522 # Actual tests are slow and impure.
523 # TODO try to run them anyway
524 # TODO better test (files in tensorflow/tools/ci_build/builds/*test)
525 # TEST_PACKAGES in tensorflow/tools/pip_package/setup.py
526 nativeCheckInputs = [
527 dill
528 keras
529 portpicker
530 tblib
531 ];
532 checkPhase = ''
533 ${python.interpreter} <<EOF
534 # A simple "Hello world"
535 import tensorflow as tf
536 hello = tf.constant("Hello, world!")
537 tf.print(hello)
538
539 # Fit a simple model to random data
540 import numpy as np
541 np.random.seed(0)
542 tf.random.set_seed(0)
543 model = tf.keras.models.Sequential([
544 tf.keras.layers.Dense(1, activation="linear")
545 ])
546 model.compile(optimizer="sgd", loss="mse")
547
548 x = np.random.uniform(size=(1,1))
549 y = np.random.uniform(size=(1,))
550 model.fit(x, y, epochs=1)
551 EOF
552 '';
553 # Regression test for #77626 removed because not more `tensorflow.contrib`.
554
555 passthru = {
556 inherit cudaPackages;
557 deps = bazel-build.deps;
558 libtensorflow = bazel-build.out;
559 };
560
561 inherit (bazel-build) meta;
562}