Merge pull request #32893 from volth/hadoop_2_7-3_0

hadoop: 2.2.0 -> 2.7.6, 2.8.4, 2.9.1, 3.0.3, 3.1.0

authored by Matthew Bauer and committed by GitHub 82b474f3 dfd90df3

+134 -55
+133 -54
pkgs/applications/networking/cluster/hadoop/default.nix
··· 1 - { stdenv, fetchurl, makeWrapper, which, jre, bash }: 1 + { stdenv, fetchurl, makeWrapper, pkgconfig, which, maven, cmake, jre, bash, coreutils, glibc, protobuf2_5, fuse, snappy, zlib, bzip2, openssl }: 2 2 3 - let 4 - hadoopDerivation = { version, sha256 }: stdenv.mkDerivation rec { 3 + let 4 + common = { version, sha256, dependencies-sha256, tomcat }: 5 + let 6 + # compile the hadoop tarball from sources, it requires some patches 7 + binary-distributon = stdenv.mkDerivation rec { 8 + name = "hadoop-${version}-bin"; 9 + src = fetchurl { 10 + url = "mirror://apache/hadoop/common/hadoop-${version}/hadoop-${version}-src.tar.gz"; 11 + inherit sha256; 12 + }; 5 13 6 - name = "hadoop-${version}"; 14 + # perform fake build to make a fixed-output derivation of dependencies downloaded from maven central (~100Mb in ~3000 files) 15 + fetched-maven-deps = stdenv.mkDerivation { 16 + name = "hadoop-${version}-maven-deps"; 17 + inherit src nativeBuildInputs buildInputs configurePhase; 18 + buildPhase = '' 19 + while mvn package -Dmaven.repo.local=$out/.m2 ${mavenFlags} -Dmaven.wagon.rto=5000; [ $? = 1 ]; do 20 + echo "timeout, restart maven to continue downloading" 21 + done 22 + ''; 23 + # keep only *.{pom,jar,xml,sha1,so,dll,dylib} and delete all ephemeral files with lastModified timestamps inside 24 + installPhase = ''find $out/.m2 -type f -regex '.+\(\.lastUpdated\|resolver-status\.properties\|_remote\.repositories\)' -delete''; 25 + outputHashAlgo = "sha256"; 26 + outputHashMode = "recursive"; 27 + outputHash = dependencies-sha256; 28 + }; 7 29 8 - src = fetchurl { 9 - url = "mirror://apache/hadoop/common/${name}/${name}.tar.gz"; 10 - sha256 = "${sha256}"; 11 - }; 30 + nativeBuildInputs = [ maven cmake pkgconfig ]; 31 + buildInputs = [ fuse snappy zlib bzip2 openssl protobuf2_5 ]; 32 + # most of the hardcoded pathes are fixed in 2.9.x and 3.0.0, this list of patched files might be reduced when 2.7.x and 2.8.x will be deprecated 33 + postPatch = '' 34 + for file in hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HardLink.java \ 35 + hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java \ 36 + hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java \ 37 + hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DockerContainerExecutor.java \ 38 + hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java \ 39 + hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java; do 40 + if [ -f "$file" ]; then 41 + substituteInPlace "$file" \ 42 + --replace '/usr/bin/stat' 'stat' \ 43 + --replace '/bin/bash' 'bash' \ 44 + --replace '/bin/ls' 'ls' \ 45 + --replace '/bin/mv' 'mv' 46 + fi 47 + done 48 + ''; 49 + configurePhase = "true"; # do not trigger cmake hook 50 + mavenFlags = "-Drequire.snappy -Drequire.bzip2 -DskipTests -Pdist,native -e"; 51 + # prevent downloading tomcat during the build 52 + preBuild = stdenv.lib.optionalString (tomcat != null) '' 53 + install -D ${tomcat.src} hadoop-hdfs-project/hadoop-hdfs-httpfs/downloads/apache-tomcat-${tomcat.version}.tar.gz 54 + install -D ${tomcat.src} hadoop-common-project/hadoop-kms/downloads/apache-tomcat-${tomcat.version}.tar.gz 55 + ''; 56 + buildPhase = '' 57 + # 'maven.repo.local' must be writable 58 + mvn package --offline -Dmaven.repo.local=$(cp -dpR ${fetched-maven-deps}/.m2 ./ && chmod +w -R .m2 && pwd)/.m2 ${mavenFlags} 59 + # remove runtime dependency on $jdk/jre/lib/amd64/server/libjvm.so 60 + patchelf --set-rpath ${stdenv.lib.makeLibraryPath [glibc]} hadoop-dist/target/hadoop-${version}/lib/native/libhadoop.so.1.0.0 61 + patchelf --set-rpath ${stdenv.lib.makeLibraryPath [glibc]} hadoop-dist/target/hadoop-${version}/lib/native/libhdfs.so.0.0.0 62 + ''; 63 + installPhase = "mv hadoop-dist/target/hadoop-${version} $out"; 64 + }; 65 + in 66 + stdenv.mkDerivation rec { 67 + name = "hadoop-${version}"; 12 68 13 - buildInputs = [ makeWrapper ]; 69 + src = binary-distributon; 14 70 15 - buildPhase = '' 16 - for n in bin/{hadoop,hdfs,mapred,yarn} sbin/*.sh; do 17 - sed -i $n -e "s|#!/usr/bin/env bash|#! ${bash}/bin/bash|" 18 - done 19 - '' + stdenv.lib.optionalString (!stdenv.isDarwin) '' 20 - patchelf --set-interpreter "$(cat $NIX_CC/nix-support/dynamic-linker)" bin/container-executor; 21 - patchelf --set-interpreter "$(cat $NIX_CC/nix-support/dynamic-linker)" bin/test-container-executor; 22 - ''; 71 + nativeBuildInputs = [ makeWrapper ]; 23 72 24 - installPhase = '' 25 - mkdir -p $out 26 - mv *.txt share/doc/hadoop/ 27 - mv * $out 73 + installPhase = '' 74 + mkdir -p $out/share/doc/hadoop 75 + cp -dpR * $out/ 76 + mv $out/*.txt $out/share/doc/hadoop/ 28 77 29 - for n in $out/bin/{hadoop,hdfs,mapred,yarn} $out/sbin/*.sh; do 30 - wrapProgram $n --prefix PATH : "${stdenv.lib.makeBinPath [ which jre bash ]}" --set JAVA_HOME "${jre}" --set HADOOP_HOME "$out" 31 - done 32 - ''; 78 + # 79 + # Do not use `wrapProgram` here, script renaming may result to weird things: http://i.imgur.com/0Xee013.png 80 + # 81 + mkdir -p $out/bin.wrapped 82 + for n in $out/bin/*; do 83 + if [ -f "$n" ]; then # only regular files 84 + mv $n $out/bin.wrapped/ 85 + makeWrapper $out/bin.wrapped/$(basename $n) $n \ 86 + --prefix PATH : "${stdenv.lib.makeBinPath [ which jre bash coreutils ]}" \ 87 + --prefix JAVA_LIBRARY_PATH : "${stdenv.lib.makeLibraryPath [ openssl snappy zlib bzip2 ]}" \ 88 + --set JAVA_HOME "${jre}" \ 89 + --set HADOOP_PREFIX "$out" 90 + fi 91 + done 92 + ''; 33 93 34 - meta = { 35 - homepage = http://hadoop.apache.org/; 36 - description = "Framework for distributed processing of large data sets across clusters of computers"; 37 - license = stdenv.lib.licenses.asl20; 94 + meta = with stdenv.lib; { 95 + homepage = "http://hadoop.apache.org/"; 96 + description = "Framework for distributed processing of large data sets across clusters of computers"; 97 + license = licenses.asl20; 38 98 39 - longDescription = '' 40 - The Apache Hadoop software library is a framework that allows for 41 - the distributed processing of large data sets across clusters of 42 - computers using a simple programming model. It is designed to 43 - scale up from single servers to thousands of machines, each 44 - offering local computation and storage. Rather than rely on 45 - hardware to deliver high-avaiability, the library itself is 46 - designed to detect and handle failures at the application layer, 47 - so delivering a highly-availabile service on top of a cluster of 48 - computers, each of which may be prone to failures. 49 - ''; 99 + longDescription = '' 100 + The Apache Hadoop software library is a framework that allows for 101 + the distributed processing of large data sets across clusters of 102 + computers using a simple programming model. It is designed to 103 + scale up from single servers to thousands of machines, each 104 + offering local computation and storage. Rather than rely on 105 + hardware to deliver high-avaiability, the library itself is 106 + designed to detect and handle failures at the application layer, 107 + so delivering a highly-availabile service on top of a cluster of 108 + computers, each of which may be prone to failures. 109 + ''; 110 + maintainers = with maintainers; [ volth ]; 111 + platforms = [ "x86_64-linux" ]; 112 + }; 113 + }; 50 114 51 - platforms = stdenv.lib.platforms.linux; 115 + tomcat_6_0_48 = rec { 116 + version = "6.0.48"; 117 + src = fetchurl { 118 + # do not use "mirror://apache/" here, tomcat-6 is legacy and has been removed from the mirrors 119 + url = "https://archive.apache.org/dist/tomcat/tomcat-6/v${version}/bin/apache-tomcat-${version}.tar.gz"; 120 + sha256 = "1w4jf28g8p25fmijixw6b02iqlagy2rvr57y3n90hvz341kb0bbc"; 52 121 }; 53 122 }; 54 - in 55 - { 56 - hadoop_2_7 = hadoopDerivation { 123 + 124 + in { 125 + hadoop_2_7 = common { 57 126 version = "2.7.6"; 58 - sha256 = "0sanwam0k2m40pfsf9l5zxvklv8rvq78xvhd2pbsbiab7ylpwcpj"; 127 + sha256 = "0wmg0iy0qxrf43fzajzmx03gxp4yx197vxacqwkxaj45clqwl010"; 128 + dependencies-sha256 = "1lsr9nvrynzspxqcamb10d596zlnmnfpxhkd884gdiva0frm0b1r"; 129 + tomcat = tomcat_6_0_48; 59 130 }; 60 - hadoop_2_8 = hadoopDerivation { 131 + hadoop_2_8 = common { 61 132 version = "2.8.4"; 62 - sha256 = "05dik4qnazhf5aldwkljf610cwncsg5y3hyvgj476cfpzmr5jm3b"; 133 + sha256 = "16c3ljhrzibkjn3y1bmjxdgf0kn60l23ay5hqpp7vpbnqx52x68w"; 134 + dependencies-sha256 = "1j4f461487fydgr5978nnm245ksv4xbvskfr8pbmfhcyss6b7w03"; 135 + tomcat = tomcat_6_0_48; 63 136 }; 64 - hadoop_2_9 = hadoopDerivation { 137 + hadoop_2_9 = common { 65 138 version = "2.9.1"; 66 - sha256 = "1z22v46mmq9hfjc229x61ws332sa1rvmib3v4jsd6i1n29d03mpf"; 139 + sha256 = "0qgmpfbpv7f521fkjy5ldzdb4lwiblhs0hyl8qy041ws17y5x7d7"; 140 + dependencies-sha256 = "1d5i8jj5y746rrqb9lscycnd7acmxlkz64ydsiyqsh5cdqgy2x7x"; 141 + tomcat = tomcat_6_0_48; 67 142 }; 68 - hadoop_3_0 = hadoopDerivation { 69 - version = "3.0.2"; 70 - sha256 = "10ig3rrcaizvs5bnni15fvm942mr5hfc2hr355g6ich722kpll0d"; 143 + hadoop_3_0 = common { 144 + version = "3.0.3"; 145 + sha256 = "1vvkci0kx4b48dg0niifn2d3r4wwq8pb3c5z20wy8pqsqrqhlci5"; 146 + dependencies-sha256 = "1kzkna9ywacm2m1cirj9cyip66bgqjhid2xf9rrhq6g10lhr8j9m"; 147 + tomcat = null; 71 148 }; 72 - hadoop_3_1 = hadoopDerivation { 149 + hadoop_3_1 = common { 73 150 version = "3.1.0"; 74 - sha256 = "1rs3a752is1y2vgxjlqmmln00iwzncwlwg59l6gjv92zb7njq3b7"; 151 + sha256 = "0lig25jkffkzc2bfgyrnm3wymapgyw9fkai8sk9fnmp7cljia314"; 152 + dependencies-sha256 = "1ri6a7lrijh538vy7v0fzgvkw603pf8jkh3ldl1kl7l0dvszd70d"; 153 + tomcat = null; 75 154 }; 76 155 }
+1 -1
pkgs/top-level/all-packages.nix
··· 7220 7220 7221 7221 guile = guile_2_2; 7222 7222 7223 - inherit (callPackage ../applications/networking/cluster/hadoop { }) 7223 + inherit (callPackages ../applications/networking/cluster/hadoop { }) 7224 7224 hadoop_2_7 7225 7225 hadoop_2_8 7226 7226 hadoop_2_9