pkgs/development/python-modules/tensorflow/default.nix at 22.05-pre · pyrox.dev/nixpkgs

pyrox.dev / nixpkgs
lol
nixpkgs / pkgs / development / python-modules / tensorflow / default.nix
at 22.05-pre 13 kB view raw
  1{ stdenv, bazel_3, buildBazelPackage, isPy3k, lib, fetchFromGitHub, symlinkJoin
  2, addOpenGLRunpath, fetchpatch
  3# Python deps
  4, buildPythonPackage, pythonOlder, python
  5# Python libraries
  6, numpy, tensorflow-tensorboard_2, absl-py
  7, setuptools, wheel, keras-preprocessing, google-pasta
  8, opt-einsum, astunparse, h5py
  9, termcolor, grpcio, six, wrapt, protobuf, tensorflow-estimator_2
 10, dill, flatbuffers-python, tblib, typing-extensions
 11# Common deps
 12, git, pybind11, which, binutils, glibcLocales, cython, perl
 13# Common libraries
 14, jemalloc, mpi, gast, grpc, sqlite, boringssl, jsoncpp
 15, curl, snappy, flatbuffers-core, lmdb-core, icu, double-conversion, libpng, libjpeg_turbo, giflib
 16# Upsteam by default includes cuda support since tensorflow 1.15. We could do
 17# that in nix as well. It would make some things easier and less confusing, but
 18# it would also make the default tensorflow package unfree. See
 19# https://groups.google.com/a/tensorflow.org/forum/#!topic/developers/iRCt5m4qUz0
 20, cudaSupport ? false, cudatoolkit ? null, cudnn ? null, nccl ? null
 21, mklSupport ? false, mkl ? null
 22, tensorboardSupport ? true
 23# XLA without CUDA is broken
 24, xlaSupport ? cudaSupport
 25# Default from ./configure script
 26, cudaCapabilities ? [ "sm_35" "sm_50" "sm_60" "sm_70" "sm_75" "compute_80" ]
 27, sse42Support ? stdenv.hostPlatform.sse4_2Support
 28, avx2Support  ? stdenv.hostPlatform.avx2Support
 29, fmaSupport   ? stdenv.hostPlatform.fmaSupport
 30# Darwin deps
 31, Foundation, Security
 32}:
 33
 34assert cudaSupport -> cudatoolkit != null
 35                   && cudnn != null;
 36
 37# unsupported combination
 38assert ! (stdenv.isDarwin && cudaSupport);
 39
 40assert mklSupport -> mkl != null;
 41
 42let
 43  withTensorboard = (pythonOlder "3.6") || tensorboardSupport;
 44
 45  cudatoolkit_joined = symlinkJoin {
 46    name = "${cudatoolkit.name}-merged";
 47    paths = [
 48      cudatoolkit.lib
 49      cudatoolkit.out
 50    ] ++ lib.optionals (lib.versionOlder cudatoolkit.version "11") [
 51      # for some reason some of the required libs are in the targets/x86_64-linux
 52      # directory; not sure why but this works around it
 53      "${cudatoolkit}/targets/${stdenv.system}"
 54    ];
 55  };
 56
 57  cudatoolkit_cc_joined = symlinkJoin {
 58    name = "${cudatoolkit.cc.name}-merged";
 59    paths = [
 60      cudatoolkit.cc
 61      binutils.bintools # for ar, dwp, nm, objcopy, objdump, strip
 62    ];
 63  };
 64
 65  # Needed for _some_ system libraries, grep INCLUDEDIR.
 66  includes_joined = symlinkJoin {
 67    name = "tensorflow-deps-merged";
 68    paths = [
 69      jsoncpp
 70    ];
 71  };
 72
 73  tfFeature = x: if x then "1" else "0";
 74
 75  version = "2.4.2";
 76  variant = if cudaSupport then "-gpu" else "";
 77  pname = "tensorflow${variant}";
 78
 79  pythonEnv = python.withPackages (_:
 80    [ # python deps needed during wheel build time (not runtime, see the buildPythonPackage part for that)
 81      # This list can likely be shortened, but each trial takes multiple hours so won't bother for now.
 82      absl-py
 83      astunparse
 84      dill
 85      flatbuffers-python
 86      gast
 87      google-pasta
 88      grpcio
 89      h5py
 90      keras-preprocessing
 91      numpy
 92      opt-einsum
 93      protobuf
 94      setuptools
 95      six
 96      tblib
 97      tensorflow-estimator_2
 98      tensorflow-tensorboard_2
 99      termcolor
100      typing-extensions
101      wheel
102      wrapt
103  ]);
104
105  bazel-build = buildBazelPackage {
106    name = "${pname}-${version}";
107    bazel = bazel_3;
108
109    src = fetchFromGitHub {
110      owner = "tensorflow";
111      repo = "tensorflow";
112      rev = "v${version}";
113      sha256 = "07a2y05hixch1bjag5pzw3p1m7bdj3bq4gdvmsfk2xraz49b1pi8";
114    };
115
116    patches = [
117      # included from 2.6.0 onwards
118      (fetchpatch {
119        name = "fix-numpy-1.20-notimplementederror.patch";
120        url = "https://github.com/tensorflow/tensorflow/commit/b258941525f496763d4277045b6513c815720e3a.patch";
121        sha256 = "19f9bzrcfsynk11s2hqvscin5c65zf7r6g3nb10jnimw79vafiry";
122      })
123      # Relax too strict Python packages versions dependencies.
124      ./relax-dependencies.patch
125      # Add missing `io_bazel_rules_docker` dependency.
126      ./workspace.patch
127    ];
128
129    # On update, it can be useful to steal the changes from gentoo
130    # https://gitweb.gentoo.org/repo/gentoo.git/tree/sci-libs/tensorflow
131
132    nativeBuildInputs = [
133      which pythonEnv cython perl
134    ] ++ lib.optional cudaSupport addOpenGLRunpath;
135
136    buildInputs = [
137      jemalloc
138      mpi
139      glibcLocales
140      git
141
142      # libs taken from system through the TF_SYS_LIBS mechanism
143      grpc
144      sqlite
145      boringssl
146      jsoncpp
147      curl
148      pybind11
149      snappy
150      flatbuffers-core
151      icu
152      double-conversion
153      libpng
154      libjpeg_turbo
155      giflib
156      lmdb-core
157    ] ++ lib.optionals cudaSupport [
158      cudatoolkit
159      cudnn
160    ] ++ lib.optionals mklSupport [
161      mkl
162    ] ++ lib.optionals stdenv.isDarwin [
163      Foundation
164      Security
165    ];
166
167    # arbitrarily set to the current latest bazel version, overly careful
168    TF_IGNORE_MAX_BAZEL_VERSION = true;
169
170    # Take as many libraries from the system as possible. Keep in sync with
171    # list of valid syslibs in
172    # https://github.com/tensorflow/tensorflow/blob/master/third_party/systemlibs/syslibs_configure.bzl
173    TF_SYSTEM_LIBS = lib.concatStringsSep "," [
174      "absl_py"
175      "astor_archive"
176      "astunparse_archive"
177      "boringssl"
178      # Not packaged in nixpkgs
179      # "com_github_googleapis_googleapis"
180      # "com_github_googlecloudplatform_google_cloud_cpp"
181      "com_github_grpc_grpc"
182      # Multiple issues with custom protobuf.
183      # First `com_github_googleapis` fails to configure. Can be worked around by disabling `com_github_googleapis`
184      # and related functionality, but then the next error is about "dangling symbolic link", and in general
185      # looks like that's only the beginning: see
186      # https://stackoverflow.com/questions/55578884/how-to-build-tensorflow-1-13-1-with-custom-protobuf
187      # "com_google_protobuf"
188      # Fails with the error: external/org_tensorflow/tensorflow/core/profiler/utils/tf_op_utils.cc:46:49: error: no matching function for call to 're2::RE2::FullMatch(absl::lts_2020_02_25::string_view&, re2::RE2&)'
189      # "com_googlesource_code_re2"
190      "curl"
191      "cython"
192      "dill_archive"
193      "double_conversion"
194      "enum34_archive"
195      "flatbuffers"
196      "functools32_archive"
197      "gast_archive"
198      "gif"
199      "hwloc"
200      "icu"
201      "jsoncpp_git"
202      "libjpeg_turbo"
203      "lmdb"
204      "nasm"
205      # "nsync" # not packaged in nixpkgs
206      "opt_einsum_archive"
207      "org_sqlite"
208      "pasta"
209      "pcre"
210      "png"
211      "pybind11"
212      "six_archive"
213      "snappy"
214      "tblib_archive"
215      "termcolor_archive"
216      "typing_extensions_archive"
217      "wrapt"
218      "zlib"
219    ];
220
221    INCLUDEDIR = "${includes_joined}/include";
222
223    PYTHON_BIN_PATH = pythonEnv.interpreter;
224
225    TF_NEED_GCP = true;
226    TF_NEED_HDFS = true;
227    TF_ENABLE_XLA = tfFeature xlaSupport;
228
229    CC_OPT_FLAGS = " ";
230
231    # https://github.com/tensorflow/tensorflow/issues/14454
232    TF_NEED_MPI = tfFeature cudaSupport;
233
234    TF_NEED_CUDA = tfFeature cudaSupport;
235    TF_CUDA_PATHS = lib.optionalString cudaSupport "${cudatoolkit_joined},${cudnn},${nccl}";
236    GCC_HOST_COMPILER_PREFIX = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin";
237    GCC_HOST_COMPILER_PATH = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin/gcc";
238    TF_CUDA_COMPUTE_CAPABILITIES = lib.concatStringsSep "," cudaCapabilities;
239
240    postPatch = ''
241      # bazel 3.3 should work just as well as bazel 3.1
242      rm -f .bazelversion
243    '' + lib.optionalString (!withTensorboard) ''
244      # Tensorboard pulls in a bunch of dependencies, some of which may
245      # include security vulnerabilities. So we make it optional.
246      # https://github.com/tensorflow/tensorflow/issues/20280#issuecomment-400230560
247      sed -i '/tensorboard ~=/d' tensorflow/tools/pip_package/setup.py
248    '';
249
250    # https://github.com/tensorflow/tensorflow/pull/39470
251    NIX_CFLAGS_COMPILE = [ "-Wno-stringop-truncation" ];
252
253    preConfigure = let
254      opt_flags = []
255        ++ lib.optionals sse42Support ["-msse4.2"]
256        ++ lib.optionals avx2Support ["-mavx2"]
257        ++ lib.optionals fmaSupport ["-mfma"];
258    in ''
259      patchShebangs configure
260
261      # dummy ldconfig
262      mkdir dummy-ldconfig
263      echo "#!${stdenv.shell}" > dummy-ldconfig/ldconfig
264      chmod +x dummy-ldconfig/ldconfig
265      export PATH="$PWD/dummy-ldconfig:$PATH"
266
267      export PYTHON_LIB_PATH="$NIX_BUILD_TOP/site-packages"
268      export CC_OPT_FLAGS="${lib.concatStringsSep " " opt_flags}"
269      mkdir -p "$PYTHON_LIB_PATH"
270
271      # To avoid mixing Python 2 and Python 3
272      unset PYTHONPATH
273    '';
274
275    configurePhase = ''
276      runHook preConfigure
277      ./configure
278      runHook postConfigure
279    '';
280
281    hardeningDisable = [ "format" ];
282
283    bazelBuildFlags = [
284      "--config=opt" # optimize using the flags set in the configure phase
285    ]
286    ++ lib.optionals stdenv.cc.isClang [ "--cxxopt=-x" "--cxxopt=c++" "--host_cxxopt=-x" "--host_cxxopt=c++" ]
287    ++ lib.optionals (mklSupport) [ "--config=mkl" ];
288
289    bazelTarget = "//tensorflow/tools/pip_package:build_pip_package //tensorflow/tools/lib_package:libtensorflow";
290
291    removeRulesCC = false;
292    # Without this Bazel complaints about sandbox violations.
293    dontAddBazelOpts = true;
294
295    fetchAttrs = {
296      # cudaSupport causes fetch of ncclArchive, resulting in different hashes
297      sha256 = if cudaSupport then
298        "10m6qj3kchgxfgb6qh59vc51knm9r9pkng8bf90h00dnggvv8234"
299      else
300        "04a98yrp09nd0p17k0jbzkgjppxs0yma7m5zkfrwgvr4g0w71v68";
301    };
302
303    buildAttrs = {
304      outputs = [ "out" "python" ];
305
306      preBuild = ''
307        patchShebangs .
308      '';
309
310      installPhase = ''
311        mkdir -p "$out"
312        tar -xf bazel-bin/tensorflow/tools/lib_package/libtensorflow.tar.gz -C "$out"
313        # Write pkgconfig file.
314        mkdir "$out/lib/pkgconfig"
315        cat > "$out/lib/pkgconfig/tensorflow.pc" << EOF
316        Name: TensorFlow
317        Version: ${version}
318        Description: Library for computation using data flow graphs for scalable machine learning
319        Requires:
320        Libs: -L$out/lib -ltensorflow
321        Cflags: -I$out/include/tensorflow
322        EOF
323
324        # build the source code, then copy it to $python (build_pip_package
325        # actually builds a symlink farm so we must dereference them).
326        bazel-bin/tensorflow/tools/pip_package/build_pip_package --src "$PWD/dist"
327        cp -Lr "$PWD/dist" "$python"
328      '';
329
330      postFixup = lib.optionalString cudaSupport ''
331        find $out -type f \( -name '*.so' -or -name '*.so.*' \) | while read lib; do
332          addOpenGLRunpath "$lib"
333        done
334      '';
335
336      requiredSystemFeatures = [
337        "big-parallel"
338      ];
339    };
340
341    meta = with lib; {
342      description = "Computation using data flow graphs for scalable machine learning";
343      homepage = "http://tensorflow.org";
344      license = licenses.asl20;
345      maintainers = with maintainers; [ jyp abbradar ];
346      platforms = with platforms; linux ++ darwin;
347      timeout = 86400; # 24 hours, needed for darwin
348      broken = !(xlaSupport -> cudaSupport);
349    };
350  };
351
352in buildPythonPackage {
353  inherit version pname;
354  disabled = !isPy3k;
355
356  src = bazel-build.python;
357
358  # Upstream has a pip hack that results in bin/tensorboard being in both tensorflow
359  # and the propagated input tensorflow-tensorboard, which causes environment collisions.
360  # Another possibility would be to have tensorboard only in the buildInputs
361  # https://github.com/tensorflow/tensorflow/blob/v1.7.1/tensorflow/tools/pip_package/setup.py#L79
362  postInstall = ''
363    rm $out/bin/tensorboard
364  '';
365
366  setupPyGlobalFlags = [ "--project_name ${pname}" ];
367
368  # tensorflow/tools/pip_package/setup.py
369  propagatedBuildInputs = [
370    absl-py
371    astunparse
372    dill
373    flatbuffers-python
374    gast
375    google-pasta
376    grpcio
377    h5py
378    keras-preprocessing
379    numpy
380    opt-einsum
381    protobuf
382    six
383    tblib
384    tensorflow-estimator_2
385    termcolor
386    typing-extensions
387    wrapt
388  ] ++ lib.optionals withTensorboard [
389    tensorflow-tensorboard_2
390  ];
391
392  nativeBuildInputs = lib.optional cudaSupport addOpenGLRunpath;
393
394  postFixup = lib.optionalString cudaSupport ''
395    find $out -type f \( -name '*.so' -or -name '*.so.*' \) | while read lib; do
396      addOpenGLRunpath "$lib"
397
398      patchelf --set-rpath "${cudatoolkit}/lib:${cudatoolkit.lib}/lib:${cudnn}/lib:${nccl}/lib:$(patchelf --print-rpath "$lib")" "$lib"
399    done
400  '';
401
402  # Actual tests are slow and impure.
403  # TODO try to run them anyway
404  # TODO better test (files in tensorflow/tools/ci_build/builds/*test)
405  checkPhase = ''
406    ${python.interpreter} <<EOF
407    # A simple "Hello world"
408    import tensorflow as tf
409    hello = tf.constant("Hello, world!")
410    tf.print(hello)
411
412    # Fit a simple model to random data
413    import numpy as np
414    np.random.seed(0)
415    tf.random.set_seed(0)
416    model = tf.keras.models.Sequential([
417        tf.keras.layers.Dense(1, activation="linear")
418    ])
419    model.compile(optimizer="sgd", loss="mse")
420
421    x = np.random.uniform(size=(1,1))
422    y = np.random.uniform(size=(1,))
423    model.fit(x, y, epochs=1)
424    EOF
425  '';
426  # Regression test for #77626 removed because not more `tensorflow.contrib`.
427
428  passthru = {
429    deps = bazel-build.deps;
430    libtensorflow = bazel-build.out;
431  };
432
433  inherit (bazel-build) meta;
434}