pkgs/applications/networking/cluster/hadoop/default.nix at devShellTools-shell · tjh.dev/nixpkgs

tjh.dev / nixpkgs
Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
nixpkgs / pkgs / applications / networking / cluster / hadoop / default.nix
at devShellTools-shell 217 lines 7.4 kB view raw
  1{
  2  lib,
  3  stdenv,
  4  fetchurl,
  5  makeWrapper,
  6  autoPatchelfHook,
  7  jdk8_headless,
  8  jdk11_headless,
  9  bash,
 10  coreutils,
 11  which,
 12  bzip2,
 13  cyrus_sasl,
 14  protobuf,
 15  snappy,
 16  zlib,
 17  zstd,
 18  openssl,
 19  nixosTests,
 20  sparkSupport ? true,
 21  spark,
 22  libtirpc,
 23  callPackage,
 24}:
 25
 26assert lib.elem stdenv.system [
 27  "x86_64-linux"
 28  "x86_64-darwin"
 29  "aarch64-linux"
 30  "aarch64-darwin"
 31];
 32
 33let
 34  common =
 35    {
 36      pname,
 37      platformAttrs,
 38      jdk,
 39      tests,
 40    }:
 41    stdenv.mkDerivation (finalAttrs: {
 42      inherit pname jdk;
 43      version = platformAttrs.${stdenv.system}.version or (throw "Unsupported system: ${stdenv.system}");
 44      src = fetchurl {
 45        url =
 46          "mirror://apache/hadoop/common/hadoop-${finalAttrs.version}/hadoop-${finalAttrs.version}"
 47          + lib.optionalString stdenv.hostPlatform.isAarch64 "-aarch64"
 48          + ".tar.gz";
 49        inherit (platformAttrs.${stdenv.system} or (throw "Unsupported system: ${stdenv.system}"))
 50          hash
 51          ;
 52      };
 53      doCheck = true;
 54
 55      # Build the container executor binary from source
 56      # InstallPhase is not lazily evaluating containerExecutor for some reason
 57      containerExecutor =
 58        if stdenv.hostPlatform.isLinux then
 59          (callPackage ./containerExecutor.nix {
 60            inherit (finalAttrs) version;
 61            inherit platformAttrs;
 62          })
 63        else
 64          "";
 65
 66      nativeBuildInputs = [
 67        makeWrapper
 68      ]
 69      ++ lib.optionals stdenv.hostPlatform.isLinux [ autoPatchelfHook ];
 70      buildInputs = lib.optionals stdenv.hostPlatform.isLinux [
 71        (lib.getLib stdenv.cc.cc)
 72        openssl
 73        protobuf
 74        zlib
 75        snappy
 76        libtirpc
 77      ];
 78
 79      installPhase = ''
 80        mkdir $out
 81        mv * $out/
 82      ''
 83      + lib.optionalString stdenv.hostPlatform.isLinux ''
 84        for n in $(find ${finalAttrs.containerExecutor}/bin -type f); do
 85          ln -sf "$n" $out/bin
 86        done
 87
 88        # these libraries are loaded at runtime by the JVM
 89        ln -s ${lib.getLib cyrus_sasl}/lib/libsasl2.so $out/lib/native/libsasl2.so.2
 90        ln -s ${lib.getLib openssl}/lib/libcrypto.so $out/lib/native/
 91        ln -s ${lib.getLib zlib}/lib/libz.so.1 $out/lib/native/
 92        ln -s ${lib.getLib zstd}/lib/libzstd.so.1 $out/lib/native/
 93        ln -s ${lib.getLib bzip2}/lib/libbz2.so.1 $out/lib/native/
 94        ln -s ${lib.getLib snappy}/lib/libsnappy.so.1 $out/lib/native/
 95
 96        # libjvm.so is in different paths for java 8 and 11
 97        # libnativetask.so in hadooop 3 and libhdfs.so in hadoop 2 depend on it
 98        find $out/lib/native/ -name 'libnativetask.so*' -o -name 'libhdfs.so*' | \
 99          xargs -n1 patchelf --add-rpath $(dirname $(find ${finalAttrs.jdk.home} -name libjvm.so | head -n1))
100
101        # NixOS/nixpkgs#193370
102        # This workaround is needed to use protobuf 3.19
103        # hadoop 3.3+ depends on protobuf 3.18, 3.2 depends on 3.8
104        find $out/lib/native -name 'libhdfspp.so*' | \
105          xargs -r -n1 patchelf --replace-needed libprotobuf.so.${
106            if (lib.versionAtLeast finalAttrs.version "3.4.1") then
107              "32"
108            else if (lib.versionAtLeast finalAttrs.version "3.3") then
109              "18"
110            else
111              "8"
112          } libprotobuf.so
113
114        patchelf --replace-needed libcrypto.so.1.1 libcrypto.so \
115          $out/lib/native/{libhdfs{pp,}.so*,examples/{pipes-sort,wordcount-nopipe,wordcount-part,wordcount-simple}}
116
117      ''
118      + ''
119        for n in $(find $out/bin -type f ! -name "*.*"); do
120          wrapProgram "$n"\
121            --set-default JAVA_HOME ${finalAttrs.jdk.home}\
122            --set-default HADOOP_HOME $out/\
123            --run "test -d /etc/hadoop-conf && export HADOOP_CONF_DIR=\''${HADOOP_CONF_DIR-'/etc/hadoop-conf/'}"\
124            --set-default HADOOP_CONF_DIR $out/etc/hadoop/\
125            --prefix PATH : "${
126              lib.makeBinPath [
127                bash
128                coreutils
129                which
130              ]
131            }"\
132            --prefix JAVA_LIBRARY_PATH : "${lib.makeLibraryPath finalAttrs.buildInputs}"
133        done
134      ''
135      + (lib.optionalString sparkSupport ''
136        # Add the spark shuffle service jar to YARN
137        cp ${spark.src}/yarn/spark-${spark.version}-yarn-shuffle.jar $out/share/hadoop/yarn/
138      '');
139
140      passthru = { inherit tests; };
141
142      meta =
143        with lib;
144        recursiveUpdate {
145          homepage = "https://hadoop.apache.org/";
146          description = "Framework for distributed processing of large data sets across clusters of computers";
147          license = licenses.asl20;
148          sourceProvenance = with sourceTypes; [ binaryBytecode ];
149
150          longDescription = ''
151            The Apache Hadoop software library is a framework that allows for
152            the distributed processing of large data sets across clusters of
153            computers using a simple programming model. It is designed to
154            scale up from single servers to thousands of machines, each
155            offering local computation and storage. Rather than rely on
156            hardware to deliver high-avaiability, the library itself is
157            designed to detect and handle failures at the application layer,
158            so delivering a highly-availabile service on top of a cluster of
159            computers, each of which may be prone to failures.
160          '';
161          maintainers = with maintainers; [ illustris ];
162          platforms = attrNames platformAttrs;
163        } (attrByPath [ stdenv.system "meta" ] { } platformAttrs);
164    });
165in
166{
167  # Different version of hadoop support different java runtime versions
168  # https://cwiki.apache.org/confluence/display/HADOOP/Hadoop+Java+Versions
169  hadoop_3_4 = common {
170    pname = "hadoop";
171    platformAttrs = rec {
172      x86_64-linux = {
173        version = "3.4.1";
174        hash = "sha256-mtVIeDOZbf5VFOdW9DkQKckFKf0i6NAC/T3QwUwEukY=";
175        srcHash = "sha256-lE9uSohy6GWXprFEYbEin2ITqTms2h6EWXe4nEd3U4Y=";
176      };
177      x86_64-darwin = x86_64-linux;
178      aarch64-linux = x86_64-linux // {
179        version = "3.4.0";
180        hash = "sha256-QWxzKtNyw/AzcHMv0v7kj91pw1HO7VAN9MHO84caFk8=";
181        srcHash = "sha256-viDF3LdRCZHqFycOYfN7nUQBPHiMCIjmu7jgIAaaK9E=";
182      };
183      aarch64-darwin = aarch64-linux;
184    };
185    jdk = jdk11_headless;
186    # TODO: Package and add Intel Storage Acceleration Library
187    tests = nixosTests.hadoop;
188  };
189  hadoop_3_3 = common rec {
190    pname = "hadoop";
191    platformAttrs = rec {
192      x86_64-linux = {
193        version = "3.3.6";
194        hash = "sha256-9RlQWcDUECrap//xf3sqhd+Qa8tuGZSHFjGfmXhkGgQ=";
195        srcHash = "sha256-4OEsVhBNV9CJ+PN4FgCduUCVA9/el5yezSCZ6ko3+bU=";
196      };
197      x86_64-darwin = x86_64-linux;
198      aarch64-linux = x86_64-linux // {
199        hash = "sha256-5Lv2uA72BJEva5v2yncyPe5gKNCNOPNsoHffVt6KXQ0=";
200      };
201      aarch64-darwin = aarch64-linux;
202    };
203    jdk = jdk11_headless;
204    # TODO: Package and add Intel Storage Acceleration Library
205    tests = nixosTests.hadoop_3_3;
206  };
207  hadoop2 = common rec {
208    pname = "hadoop";
209    platformAttrs.x86_64-linux = {
210      version = "2.10.2";
211      hash = "sha256-xhA4zxqIRGNhIeBnJO9dLKf/gx/Bq+uIyyZwsIafEyo=";
212      srcHash = "sha256-ucxCyXiJo8aL6aNMhZgKEbn8sGKOoMPVREbMGSfSdAI=";
213    };
214    jdk = jdk8_headless;
215    tests = nixosTests.hadoop2;
216  };
217}