1{
2 lib,
3 stdenv,
4 fetchurl,
5 makeWrapper,
6 autoPatchelfHook,
7 jdk8_headless,
8 jdk11_headless,
9 bash,
10 coreutils,
11 which,
12 bzip2,
13 cyrus_sasl,
14 protobuf,
15 snappy,
16 zlib,
17 zstd,
18 openssl,
19 nixosTests,
20 sparkSupport ? true,
21 spark,
22 libtirpc,
23 callPackage,
24}:
25
26assert lib.elem stdenv.system [
27 "x86_64-linux"
28 "x86_64-darwin"
29 "aarch64-linux"
30 "aarch64-darwin"
31];
32
33let
34 common =
35 {
36 pname,
37 platformAttrs,
38 jdk,
39 tests,
40 }:
41 stdenv.mkDerivation (finalAttrs: {
42 inherit pname jdk;
43 version = platformAttrs.${stdenv.system}.version or (throw "Unsupported system: ${stdenv.system}");
44 src = fetchurl {
45 url =
46 "mirror://apache/hadoop/common/hadoop-${finalAttrs.version}/hadoop-${finalAttrs.version}"
47 + lib.optionalString stdenv.hostPlatform.isAarch64 "-aarch64"
48 + ".tar.gz";
49 inherit (platformAttrs.${stdenv.system} or (throw "Unsupported system: ${stdenv.system}"))
50 hash
51 ;
52 };
53 doCheck = true;
54
55 # Build the container executor binary from source
56 # InstallPhase is not lazily evaluating containerExecutor for some reason
57 containerExecutor =
58 if stdenv.hostPlatform.isLinux then
59 (callPackage ./containerExecutor.nix {
60 inherit (finalAttrs) version;
61 inherit platformAttrs;
62 })
63 else
64 "";
65
66 nativeBuildInputs = [
67 makeWrapper
68 ]
69 ++ lib.optionals stdenv.hostPlatform.isLinux [ autoPatchelfHook ];
70 buildInputs = lib.optionals stdenv.hostPlatform.isLinux [
71 (lib.getLib stdenv.cc.cc)
72 openssl
73 protobuf
74 zlib
75 snappy
76 libtirpc
77 ];
78
79 installPhase = ''
80 mkdir $out
81 mv * $out/
82 ''
83 + lib.optionalString stdenv.hostPlatform.isLinux ''
84 for n in $(find ${finalAttrs.containerExecutor}/bin -type f); do
85 ln -sf "$n" $out/bin
86 done
87
88 # these libraries are loaded at runtime by the JVM
89 ln -s ${lib.getLib cyrus_sasl}/lib/libsasl2.so $out/lib/native/libsasl2.so.2
90 ln -s ${lib.getLib openssl}/lib/libcrypto.so $out/lib/native/
91 ln -s ${lib.getLib zlib}/lib/libz.so.1 $out/lib/native/
92 ln -s ${lib.getLib zstd}/lib/libzstd.so.1 $out/lib/native/
93 ln -s ${lib.getLib bzip2}/lib/libbz2.so.1 $out/lib/native/
94 ln -s ${lib.getLib snappy}/lib/libsnappy.so.1 $out/lib/native/
95
96 # libjvm.so is in different paths for java 8 and 11
97 # libnativetask.so in hadooop 3 and libhdfs.so in hadoop 2 depend on it
98 find $out/lib/native/ -name 'libnativetask.so*' -o -name 'libhdfs.so*' | \
99 xargs -n1 patchelf --add-rpath $(dirname $(find ${finalAttrs.jdk.home} -name libjvm.so | head -n1))
100
101 # NixOS/nixpkgs#193370
102 # This workaround is needed to use protobuf 3.19
103 # hadoop 3.3+ depends on protobuf 3.18, 3.2 depends on 3.8
104 find $out/lib/native -name 'libhdfspp.so*' | \
105 xargs -r -n1 patchelf --replace-needed libprotobuf.so.${
106 if (lib.versionAtLeast finalAttrs.version "3.4.1") then
107 "32"
108 else if (lib.versionAtLeast finalAttrs.version "3.3") then
109 "18"
110 else
111 "8"
112 } libprotobuf.so
113
114 patchelf --replace-needed libcrypto.so.1.1 libcrypto.so \
115 $out/lib/native/{libhdfs{pp,}.so*,examples/{pipes-sort,wordcount-nopipe,wordcount-part,wordcount-simple}}
116
117 ''
118 + ''
119 for n in $(find $out/bin -type f ! -name "*.*"); do
120 wrapProgram "$n"\
121 --set-default JAVA_HOME ${finalAttrs.jdk.home}\
122 --set-default HADOOP_HOME $out/\
123 --run "test -d /etc/hadoop-conf && export HADOOP_CONF_DIR=\''${HADOOP_CONF_DIR-'/etc/hadoop-conf/'}"\
124 --set-default HADOOP_CONF_DIR $out/etc/hadoop/\
125 --prefix PATH : "${
126 lib.makeBinPath [
127 bash
128 coreutils
129 which
130 ]
131 }"\
132 --prefix JAVA_LIBRARY_PATH : "${lib.makeLibraryPath finalAttrs.buildInputs}"
133 done
134 ''
135 + (lib.optionalString sparkSupport ''
136 # Add the spark shuffle service jar to YARN
137 cp ${spark.src}/yarn/spark-${spark.version}-yarn-shuffle.jar $out/share/hadoop/yarn/
138 '');
139
140 passthru = { inherit tests; };
141
142 meta =
143 with lib;
144 recursiveUpdate {
145 homepage = "https://hadoop.apache.org/";
146 description = "Framework for distributed processing of large data sets across clusters of computers";
147 license = licenses.asl20;
148 sourceProvenance = with sourceTypes; [ binaryBytecode ];
149
150 longDescription = ''
151 The Apache Hadoop software library is a framework that allows for
152 the distributed processing of large data sets across clusters of
153 computers using a simple programming model. It is designed to
154 scale up from single servers to thousands of machines, each
155 offering local computation and storage. Rather than rely on
156 hardware to deliver high-avaiability, the library itself is
157 designed to detect and handle failures at the application layer,
158 so delivering a highly-availabile service on top of a cluster of
159 computers, each of which may be prone to failures.
160 '';
161 maintainers = with maintainers; [ illustris ];
162 platforms = attrNames platformAttrs;
163 } (attrByPath [ stdenv.system "meta" ] { } platformAttrs);
164 });
165in
166{
167 # Different version of hadoop support different java runtime versions
168 # https://cwiki.apache.org/confluence/display/HADOOP/Hadoop+Java+Versions
169 hadoop_3_4 = common {
170 pname = "hadoop";
171 platformAttrs = rec {
172 x86_64-linux = {
173 version = "3.4.1";
174 hash = "sha256-mtVIeDOZbf5VFOdW9DkQKckFKf0i6NAC/T3QwUwEukY=";
175 srcHash = "sha256-lE9uSohy6GWXprFEYbEin2ITqTms2h6EWXe4nEd3U4Y=";
176 };
177 x86_64-darwin = x86_64-linux;
178 aarch64-linux = x86_64-linux // {
179 version = "3.4.0";
180 hash = "sha256-QWxzKtNyw/AzcHMv0v7kj91pw1HO7VAN9MHO84caFk8=";
181 srcHash = "sha256-viDF3LdRCZHqFycOYfN7nUQBPHiMCIjmu7jgIAaaK9E=";
182 };
183 aarch64-darwin = aarch64-linux;
184 };
185 jdk = jdk11_headless;
186 # TODO: Package and add Intel Storage Acceleration Library
187 tests = nixosTests.hadoop;
188 };
189 hadoop_3_3 = common rec {
190 pname = "hadoop";
191 platformAttrs = rec {
192 x86_64-linux = {
193 version = "3.3.6";
194 hash = "sha256-9RlQWcDUECrap//xf3sqhd+Qa8tuGZSHFjGfmXhkGgQ=";
195 srcHash = "sha256-4OEsVhBNV9CJ+PN4FgCduUCVA9/el5yezSCZ6ko3+bU=";
196 };
197 x86_64-darwin = x86_64-linux;
198 aarch64-linux = x86_64-linux // {
199 hash = "sha256-5Lv2uA72BJEva5v2yncyPe5gKNCNOPNsoHffVt6KXQ0=";
200 };
201 aarch64-darwin = aarch64-linux;
202 };
203 jdk = jdk11_headless;
204 # TODO: Package and add Intel Storage Acceleration Library
205 tests = nixosTests.hadoop_3_3;
206 };
207 hadoop2 = common rec {
208 pname = "hadoop";
209 platformAttrs.x86_64-linux = {
210 version = "2.10.2";
211 hash = "sha256-xhA4zxqIRGNhIeBnJO9dLKf/gx/Bq+uIyyZwsIafEyo=";
212 srcHash = "sha256-ucxCyXiJo8aL6aNMhZgKEbn8sGKOoMPVREbMGSfSdAI=";
213 };
214 jdk = jdk8_headless;
215 tests = nixosTests.hadoop2;
216 };
217}