Merge pull request #256449 from illustris/hadoop

hadoop: 3.3.5 -> 3.3.6, build container executor from source

authored by

Peder Bergebakken Sundt and committed by
GitHub
d2af1eb6 5883b305

+119 -65
+9 -9
nixos/modules/services/cluster/hadoop/default.nix
··· 67 67 mapredSiteDefault = mkOption { 68 68 default = { 69 69 "mapreduce.framework.name" = "yarn"; 70 - "yarn.app.mapreduce.am.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}"; 71 - "mapreduce.map.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}"; 72 - "mapreduce.reduce.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}"; 70 + "yarn.app.mapreduce.am.env" = "HADOOP_MAPRED_HOME=${cfg.package}"; 71 + "mapreduce.map.env" = "HADOOP_MAPRED_HOME=${cfg.package}"; 72 + "mapreduce.reduce.env" = "HADOOP_MAPRED_HOME=${cfg.package}"; 73 73 }; 74 74 defaultText = literalExpression '' 75 75 { 76 76 "mapreduce.framework.name" = "yarn"; 77 - "yarn.app.mapreduce.am.env" = "HADOOP_MAPRED_HOME=''${config.${opt.package}}/lib/''${config.${opt.package}.untarDir}"; 78 - "mapreduce.map.env" = "HADOOP_MAPRED_HOME=''${config.${opt.package}}/lib/''${config.${opt.package}.untarDir}"; 79 - "mapreduce.reduce.env" = "HADOOP_MAPRED_HOME=''${config.${opt.package}}/lib/''${config.${opt.package}.untarDir}"; 77 + "yarn.app.mapreduce.am.env" = "HADOOP_MAPRED_HOME=''${config.${opt.package}}"; 78 + "mapreduce.map.env" = "HADOOP_MAPRED_HOME=''${config.${opt.package}}"; 79 + "mapreduce.reduce.env" = "HADOOP_MAPRED_HOME=''${config.${opt.package}}"; 80 80 } 81 81 ''; 82 82 type = types.attrsOf types.anything; ··· 154 154 }; 155 155 156 156 log4jProperties = mkOption { 157 - default = "${cfg.package}/lib/${cfg.package.untarDir}/etc/hadoop/log4j.properties"; 157 + default = "${cfg.package}/etc/hadoop/log4j.properties"; 158 158 defaultText = literalExpression '' 159 - "''${config.${opt.package}}/lib/''${config.${opt.package}.untarDir}/etc/hadoop/log4j.properties" 159 + "''${config.${opt.package}}/etc/hadoop/log4j.properties" 160 160 ''; 161 161 type = types.path; 162 162 example = literalExpression '' 163 - "''${pkgs.hadoop}/lib/''${pkgs.hadoop.untarDir}/etc/hadoop/log4j.properties"; 163 + "''${pkgs.hadoop}/etc/hadoop/log4j.properties"; 164 164 ''; 165 165 description = lib.mdDoc "log4j.properties file added to HADOOP_CONF_DIR"; 166 166 };
+1 -1
nixos/modules/services/cluster/hadoop/yarn.nix
··· 160 160 umount /run/wrappers/yarn-nodemanager/cgroup/cpu || true 161 161 rm -rf /run/wrappers/yarn-nodemanager/ || true 162 162 mkdir -p /run/wrappers/yarn-nodemanager/{bin,etc/hadoop,cgroup/cpu} 163 - cp ${cfg.package}/lib/${cfg.package.untarDir}/bin/container-executor /run/wrappers/yarn-nodemanager/bin/ 163 + cp ${cfg.package}/bin/container-executor /run/wrappers/yarn-nodemanager/bin/ 164 164 chgrp hadoop /run/wrappers/yarn-nodemanager/bin/container-executor 165 165 chmod 6050 /run/wrappers/yarn-nodemanager/bin/container-executor 166 166 cp ${hadoopConf}/container-executor.cfg /run/wrappers/yarn-nodemanager/etc/hadoop/
+1 -1
nixos/tests/hadoop/hadoop.nix
··· 249 249 assert "standby" in client.succeed("sudo -u yarn yarn rmadmin -getAllServiceState") 250 250 client.succeed("sudo -u yarn yarn rmadmin -getAllServiceState | systemd-cat") 251 251 252 - assert "Estimated value of Pi is" in client.succeed("HADOOP_USER_NAME=hdfs yarn jar $(readlink $(which yarn) | sed -r 's~bin/yarn~lib/hadoop-*/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar~g') pi 2 10") 252 + assert "Estimated value of Pi is" in client.succeed("HADOOP_USER_NAME=hdfs yarn jar $(readlink $(which yarn) | sed -r 's~bin/yarn~share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar~g') pi 2 10") 253 253 assert "SUCCEEDED" in client.succeed("yarn application -list -appStates FINISHED") 254 254 ''; 255 255 })
+37
pkgs/applications/networking/cluster/hadoop/containerExecutor.nix
··· 1 + { version, stdenv, fetchurl, lib, cmake, openssl, platformAttrs, ... }: 2 + 3 + stdenv.mkDerivation (finalAttrs: { 4 + pname = "hadoop-yarn-containerexecutor"; 5 + inherit version; 6 + 7 + src = fetchurl { 8 + url = "mirror://apache/hadoop/common/hadoop-${finalAttrs.version}/hadoop-${finalAttrs.version}-src.tar.gz"; 9 + hash = platformAttrs.${stdenv.system}.srcHash; 10 + }; 11 + sourceRoot = "hadoop-${finalAttrs.version}-src/hadoop-yarn-project/hadoop-yarn/" 12 + +"hadoop-yarn-server/hadoop-yarn-server-nodemanager/src"; 13 + 14 + nativeBuildInputs = [ cmake ]; 15 + buildInputs = [ openssl ]; 16 + cmakeFlags = [ "-DHADOOP_CONF_DIR=/run/wrappers/yarn-nodemanager/etc/hadoop" ]; 17 + 18 + installPhase = '' 19 + mkdir $out 20 + mv target/var/empty/local/bin $out/ 21 + ''; 22 + 23 + meta = with lib; { 24 + homepage = "https://hadoop.apache.org/"; 25 + description = "Framework for distributed processing of large data sets across clusters of computers"; 26 + license = licenses.asl20; 27 + 28 + longDescription = '' 29 + The Hadoop YARN Container Executor is a native component responsible for managing the lifecycle of containers 30 + on individual nodes in a Hadoop YARN cluster. It launches, monitors, and terminates containers, ensuring that 31 + resources like CPU and memory are allocated according to the policies defined in the ResourceManager. 32 + ''; 33 + 34 + maintainers = with maintainers; [ illustris ]; 35 + platforms = filter (strings.hasSuffix "linux") (attrNames platformAttrs); 36 + }; 37 + })
+70 -51
pkgs/applications/networking/cluster/hadoop/default.nix
··· 19 19 , nixosTests 20 20 , sparkSupport ? true 21 21 , spark 22 + , libtirpc 23 + , callPackage 22 24 }: 23 25 24 26 with lib; ··· 26 28 assert elem stdenv.system [ "x86_64-linux" "x86_64-darwin" "aarch64-linux" "aarch64-darwin" ]; 27 29 28 30 let 29 - common = { pname, platformAttrs, untarDir ? "${pname}-${version}", jdk, openssl ? null, nativeLibs ? [ ], libPatches ? "", tests }: 30 - stdenv.mkDerivation rec { 31 - inherit pname jdk libPatches untarDir openssl; 31 + common = { pname, platformAttrs, jdk, tests }: 32 + stdenv.mkDerivation (finalAttrs: { 33 + inherit pname jdk; 32 34 version = platformAttrs.${stdenv.system}.version or (throw "Unsupported system: ${stdenv.system}"); 33 35 src = fetchurl { 34 - url = "mirror://apache/hadoop/common/hadoop-${version}/hadoop-${version}" + optionalString stdenv.isAarch64 "-aarch64" + ".tar.gz"; 36 + url = "mirror://apache/hadoop/common/hadoop-${finalAttrs.version}/hadoop-${finalAttrs.version}" 37 + + optionalString stdenv.isAarch64 "-aarch64" + ".tar.gz"; 35 38 inherit (platformAttrs.${stdenv.system}) hash; 36 39 }; 37 40 doCheck = true; 38 41 42 + # Build the container executor binary from source 43 + # InstallPhase is not lazily evaluating containerExecutor for some reason 44 + containerExecutor = if stdenv.isLinux then (callPackage ./containerExecutor.nix { 45 + inherit (finalAttrs) version; 46 + inherit platformAttrs; 47 + }) else ""; 48 + 39 49 nativeBuildInputs = [ makeWrapper ] 40 - ++ optionals (stdenv.isLinux && (nativeLibs != [ ] || libPatches != "")) [ autoPatchelfHook ]; 41 - buildInputs = [ openssl ] ++ nativeLibs; 50 + ++ optionals stdenv.isLinux [ autoPatchelfHook ]; 51 + buildInputs = optionals stdenv.isLinux [ stdenv.cc.cc.lib openssl protobuf zlib snappy libtirpc ]; 42 52 43 53 installPhase = '' 44 - mkdir -p $out/{lib/${untarDir}/conf,bin,lib} 45 - mv * $out/lib/${untarDir} 54 + mkdir $out 55 + mv * $out/ 46 56 '' + optionalString stdenv.isLinux '' 47 - # All versions need container-executor, but some versions can't use autoPatchelf because of broken SSL versions 48 - patchelf --set-interpreter ${glibc.out}/lib64/ld-linux-x86-64.so.2 $out/lib/${untarDir}/bin/container-executor 57 + for n in $(find ${finalAttrs.containerExecutor}/bin -type f); do 58 + ln -sf "$n" $out/bin 59 + done 60 + 61 + # these libraries are loaded at runtime by the JVM 62 + ln -s ${getLib cyrus_sasl}/lib/libsasl2.so $out/lib/native/libsasl2.so.2 63 + ln -s ${getLib openssl}/lib/libcrypto.so $out/lib/native/ 64 + ln -s ${getLib zlib}/lib/libz.so.1 $out/lib/native/ 65 + ln -s ${getLib zstd}/lib/libzstd.so.1 $out/lib/native/ 66 + ln -s ${getLib bzip2}/lib/libbz2.so.1 $out/lib/native/ 67 + ln -s ${getLib snappy}/lib/libsnappy.so.1 $out/lib/native/ 68 + 69 + # libjvm.so is in different paths for java 8 and 11 70 + # libnativetask.so in hadooop 3 and libhdfs.so in hadoop 2 depend on it 71 + find $out/lib/native/ -name 'libnativetask.so*' -o -name 'libhdfs.so*' | \ 72 + xargs -n1 patchelf --add-rpath $(dirname $(find ${finalAttrs.jdk.home} -name libjvm.so | head -n1)) 73 + 74 + # NixOS/nixpkgs#193370 75 + # This workaround is needed to use protobuf 3.19 76 + # hadoop 3.3+ depends on protobuf 3.18, 3.2 depends on 3.8 77 + find $out/lib/native -name 'libhdfspp.so*' | \ 78 + xargs -r -n1 patchelf --replace-needed libprotobuf.so.${ 79 + if (versionAtLeast finalAttrs.version "3.3") then "18" 80 + else "8" 81 + } libprotobuf.so 82 + 83 + patchelf --replace-needed libcrypto.so.1.1 libcrypto.so \ 84 + $out/lib/native/{libhdfs{pp,}.so*,examples/{pipes-sort,wordcount-nopipe,wordcount-part,wordcount-simple}} 85 + 49 86 '' + '' 50 - for n in $(find $out/lib/${untarDir}/bin -type f ! -name "*.*"); do 51 - makeWrapper "$n" "$out/bin/$(basename $n)"\ 52 - --set-default JAVA_HOME ${jdk.home}\ 53 - --set-default HADOOP_HOME $out/lib/${untarDir}\ 87 + for n in $(find $out/bin -type f ! -name "*.*"); do 88 + wrapProgram "$n"\ 89 + --set-default JAVA_HOME ${finalAttrs.jdk.home}\ 90 + --set-default HADOOP_HOME $out/\ 54 91 --run "test -d /etc/hadoop-conf && export HADOOP_CONF_DIR=\''${HADOOP_CONF_DIR-'/etc/hadoop-conf/'}"\ 55 - --set-default HADOOP_CONF_DIR $out/lib/${untarDir}/etc/hadoop/\ 92 + --set-default HADOOP_CONF_DIR $out/etc/hadoop/\ 56 93 --prefix PATH : "${makeBinPath [ bash coreutils which]}"\ 57 - --prefix JAVA_LIBRARY_PATH : "${makeLibraryPath buildInputs}" 94 + --prefix JAVA_LIBRARY_PATH : "${makeLibraryPath finalAttrs.buildInputs}" 58 95 done 59 - '' + optionalString sparkSupport '' 96 + '' + (optionalString sparkSupport '' 60 97 # Add the spark shuffle service jar to YARN 61 - cp ${spark.src}/yarn/spark-${spark.version}-yarn-shuffle.jar $out/lib/${untarDir}/share/hadoop/yarn/ 62 - '' + libPatches; 98 + cp ${spark.src}/yarn/spark-${spark.version}-yarn-shuffle.jar $out/share/hadoop/yarn/ 99 + ''); 63 100 64 101 passthru = { inherit tests; }; 65 102 ··· 83 120 maintainers = with maintainers; [ illustris ]; 84 121 platforms = attrNames platformAttrs; 85 122 } (attrByPath [ stdenv.system "meta" ] {} platformAttrs); 86 - }; 123 + }); 87 124 in 88 125 { 89 126 # Different version of hadoop support different java runtime versions ··· 91 128 hadoop_3_3 = common rec { 92 129 pname = "hadoop"; 93 130 platformAttrs = rec { 94 - x86_64-linux = { 95 - version = "3.3.5"; 96 - hash = "sha256-RG4FypL6I6YGF6ixeUbe3kcoGvFQQEFhfLfV9i50JSo="; 97 - }; 98 - x86_64-darwin = x86_64-linux; 99 - aarch64-linux = { 100 - version = "3.3.5"; 101 - hash = "sha256-qcKjbE881isauWBxIv+NY0UFbYit704/Re8Kdl6x1LA="; 102 - }; 103 - aarch64-darwin = aarch64-linux; 131 + x86_64-linux = { 132 + version = "3.3.6"; 133 + hash = "sha256-9RlQWcDUECrap//xf3sqhd+Qa8tuGZSHFjGfmXhkGgQ="; 134 + srcHash = "sha256-4OEsVhBNV9CJ+PN4FgCduUCVA9/el5yezSCZ6ko3+bU="; 135 + }; 136 + x86_64-darwin = x86_64-linux; 137 + aarch64-linux = x86_64-linux // { 138 + hash = "sha256-5Lv2uA72BJEva5v2yncyPe5gKNCNOPNsoHffVt6KXQ0="; 139 + }; 140 + aarch64-darwin = aarch64-linux; 104 141 }; 105 - untarDir = "${pname}-${platformAttrs.${stdenv.system}.version}"; 106 142 jdk = jdk11_headless; 107 - inherit openssl; 108 143 # TODO: Package and add Intel Storage Acceleration Library 109 - nativeLibs = [ stdenv.cc.cc.lib protobuf zlib snappy ]; 110 - libPatches = '' 111 - ln -s ${getLib cyrus_sasl}/lib/libsasl2.so $out/lib/${untarDir}/lib/native/libsasl2.so.2 112 - ln -s ${getLib openssl}/lib/libcrypto.so $out/lib/${untarDir}/lib/native/ 113 - ln -s ${getLib zlib}/lib/libz.so.1 $out/lib/${untarDir}/lib/native/ 114 - ln -s ${getLib zstd}/lib/libzstd.so.1 $out/lib/${untarDir}/lib/native/ 115 - ln -s ${getLib bzip2}/lib/libbz2.so.1 $out/lib/${untarDir}/lib/native/ 116 - '' + optionalString stdenv.isLinux '' 117 - # libjvm.so for Java >=11 118 - patchelf --add-rpath ${jdk.home}/lib/server $out/lib/${untarDir}/lib/native/libnativetask.so.1.0.0 119 - # Java 8 has libjvm.so at a different path 120 - patchelf --add-rpath ${jdk.home}/jre/lib/amd64/server $out/lib/${untarDir}/lib/native/libnativetask.so.1.0.0 121 - # NixOS/nixpkgs#193370 122 - # This workaround is needed to use protobuf 3.19 123 - patchelf --replace-needed libprotobuf.so.18 libprotobuf.so $out/lib/${untarDir}/lib/native/libhdfspp.so 124 - ''; 125 144 tests = nixosTests.hadoop; 126 145 }; 127 - hadoop_3_2 = common rec { 146 + hadoop_3_2 = common { 128 147 pname = "hadoop"; 129 148 platformAttrs.x86_64-linux = { 130 149 version = "3.2.4"; 131 150 hash = "sha256-qt2gpMr+NHuiVR+/zFRzRyRKG725/ZNBIM69z9J9wNw="; 151 + srcHash = "sha256-F9nGD3mZZ1eJf3Ec3AJGE9YBcL/HiagskcdKQhCn/sw="; 132 152 }; 133 153 jdk = jdk8_headless; 134 - # not using native libs because of broken openssl_1_0_2 dependency 135 - # can be manually overridden 136 154 tests = nixosTests.hadoop_3_2; 137 155 }; 138 156 hadoop2 = common rec { ··· 140 158 platformAttrs.x86_64-linux = { 141 159 version = "2.10.2"; 142 160 hash = "sha256-xhA4zxqIRGNhIeBnJO9dLKf/gx/Bq+uIyyZwsIafEyo="; 161 + srcHash = "sha256-ucxCyXiJo8aL6aNMhZgKEbn8sGKOoMPVREbMGSfSdAI="; 143 162 }; 144 163 jdk = jdk8_headless; 145 164 tests = nixosTests.hadoop2;
+1 -3
pkgs/top-level/all-packages.nix
··· 17702 17702 17703 17703 groovy = callPackage ../development/interpreters/groovy { }; 17704 17704 17705 - inherit (callPackages ../applications/networking/cluster/hadoop { 17706 - openssl = openssl_1_1; 17707 - }) 17705 + inherit (callPackages ../applications/networking/cluster/hadoop {}) 17708 17706 hadoop_3_3 17709 17707 hadoop_3_2 17710 17708 hadoop2;