1{
2 lib,
3 buildPythonPackage,
4 fetchPypi,
5 numpy,
6 pandas,
7 py4j,
8 pyarrow,
9 pythonOlder,
10}:
11
12buildPythonPackage rec {
13 pname = "pyspark";
14 version = "3.5.1";
15 format = "setuptools";
16
17 disabled = pythonOlder "3.7";
18
19 src = fetchPypi {
20 inherit pname version;
21 hash = "sha256-3WVp5Uc2Xq3E+Ie/V/FT5NWCpoxLSQ3kddVbmYFmSRA=";
22 };
23
24 # pypandoc is broken with pandoc2, so we just lose docs.
25 postPatch = ''
26 sed -i "s/'pypandoc'//" setup.py
27
28 substituteInPlace setup.py \
29 --replace py4j== 'py4j>='
30 '';
31
32 propagatedBuildInputs = [ py4j ];
33
34 passthru.optional-dependencies = {
35 ml = [ numpy ];
36 mllib = [ numpy ];
37 sql = [
38 numpy
39 pandas
40 pyarrow
41 ];
42 };
43
44 # Tests assume running spark instance
45 doCheck = false;
46
47 pythonImportsCheck = [ "pyspark" ];
48
49 meta = with lib; {
50 description = "Python bindings for Apache Spark";
51 homepage = "https://github.com/apache/spark/tree/master/python";
52 sourceProvenance = with sourceTypes; [
53 fromSource
54 binaryBytecode
55 ];
56 license = licenses.asl20;
57 maintainers = with maintainers; [ shlevy ];
58 };
59}