nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
1{
2 lib,
3 stdenv,
4 fetchzip,
5 makeWrapper,
6 python3,
7 hadoop,
8 RSupport ? true,
9 R,
10 nixosTests,
11}:
12
13let
14 spark =
15 {
16 pname,
17 version,
18 hash,
19 extraMeta ? { },
20 pysparkPython ? python3,
21 }:
22 stdenv.mkDerivation (finalAttrs: {
23 inherit
24 pname
25 version
26 hash
27 hadoop
28 R
29 pysparkPython
30 ;
31 inherit (finalAttrs.hadoop) jdk;
32 src = fetchzip {
33 url =
34 with finalAttrs;
35 "mirror://apache/spark/${pname}-${version}/${pname}-${version}-bin-without-hadoop.tgz";
36 inherit (finalAttrs) hash;
37 };
38 nativeBuildInputs = [ makeWrapper ];
39 buildInputs =
40 with finalAttrs;
41 [
42 jdk
43 pysparkPython
44 ]
45 ++ lib.optional RSupport finalAttrs.R;
46
47 installPhase = ''
48 mkdir -p "$out/opt"
49 mv * $out/
50 for n in $(find $out/bin -type f -executable ! -name "find-spark-home"); do
51 wrapProgram "$n" --set JAVA_HOME "${finalAttrs.jdk}" \
52 --run "[ -z $SPARK_DIST_CLASSPATH ] && export SPARK_DIST_CLASSPATH=$(${finalAttrs.hadoop}/bin/hadoop classpath)" \
53 ${lib.optionalString RSupport ''--set SPARKR_R_SHELL "${finalAttrs.R}/bin/R"''} \
54 --prefix PATH : "${
55 lib.makeBinPath ([ finalAttrs.pysparkPython ] ++ (lib.optionals RSupport [ finalAttrs.R ]))
56 }"
57 done
58 ln -s ${finalAttrs.hadoop} "$out/opt/hadoop"
59 ${lib.optionalString RSupport ''ln -s ${finalAttrs.R} "$out/opt/R"''}
60 '';
61
62 passthru = {
63 tests = nixosTests.spark.default.passthru.override {
64 sparkPackage = finalAttrs.finalPackage;
65 };
66 # Add python packages to PYSPARK_PYTHON
67 withPythonPackages =
68 f:
69 finalAttrs.finalPackage.overrideAttrs (old: {
70 pysparkPython = old.pysparkPython.withPackages f;
71 });
72 };
73
74 meta = {
75 description = "Apache Spark is a fast and general engine for large-scale data processing";
76 homepage = "https://spark.apache.org/";
77 sourceProvenance = with lib.sourceTypes; [ binaryBytecode ];
78 license = lib.licenses.asl20;
79 platforms = lib.platforms.all;
80 maintainers = with lib.maintainers; [
81 thoughtpolice
82 offline
83 kamilchm
84 illustris
85 ];
86 }
87 // extraMeta;
88 });
89in
90{
91 # A note on EOL and removing old versions:
92 # According to spark's versioning policy (https://spark.apache.org/versioning-policy.html),
93 # minor releases are generally maintained with bugfixes for 18 months. But it doesn't
94 # make sense to remove a given minor version the moment it crosses this threshold.
95 # For example, spark 3.3.0 was released on 2022-06-09. It would have to be removed on 2023-12-09 if
96 # we strictly adhere to the EOL timeline, despite 3.3.4 being released one day before (2023-12-08).
97 # A better policy is to keep these versions around, and clean up EOL versions just before
98 # a new NixOS release.
99 spark_3_5 = spark {
100 pname = "spark";
101 version = "3.5.5";
102 hash = "sha256-vzcWgIfHPhN3nyrxdk3f0p4fW3MpQ+FuEPnWPw0xNPg=";
103 };
104 spark_3_4 = spark rec {
105 pname = "spark";
106 version = "3.4.4";
107 hash = "sha256-GItHmthLhG7y0XSF3QINCyE7wYFb0+lPZmYLUuMa4Ww=";
108 };
109}