1{ lib
2, buildPythonPackage
3, fetchPypi
4, numpy
5, pandas
6, py4j
7, pyarrow
8, pythonOlder
9}:
10
11buildPythonPackage rec {
12 pname = "pyspark";
13 version = "3.3.1";
14 format = "setuptools";
15
16 disabled = pythonOlder "3.7";
17
18 src = fetchPypi {
19 inherit pname version;
20 hash = "sha256-6Z+n3pK+QGiEv9gxwyuTBqOpneRM/Dmi7vtu0HRF1fo=";
21 };
22
23 # pypandoc is broken with pandoc2, so we just lose docs.
24 postPatch = ''
25 sed -i "s/'pypandoc'//" setup.py
26
27 substituteInPlace setup.py \
28 --replace py4j== 'py4j>='
29 '';
30
31 propagatedBuildInputs = [
32 py4j
33 ];
34
35 passthru.optional-dependencies = {
36 ml = [
37 numpy
38 ];
39 mllib = [
40 numpy
41 ];
42 sql = [
43 numpy
44 pandas
45 pyarrow
46 ];
47 };
48
49 # Tests assume running spark instance
50 doCheck = false;
51
52 pythonImportsCheck = [
53 "pyspark"
54 ];
55
56 meta = with lib; {
57 description = "Python bindings for Apache Spark";
58 homepage = "https://github.com/apache/spark/tree/master/python";
59 sourceProvenance = with sourceTypes; [
60 fromSource
61 binaryBytecode
62 ];
63 license = licenses.asl20;
64 maintainers = with maintainers; [ shlevy ];
65 };
66}