at 24.11-pre 1.1 kB view raw
1{ 2 lib, 3 buildPythonPackage, 4 fetchPypi, 5 numpy, 6 pandas, 7 py4j, 8 pyarrow, 9 pythonOlder, 10}: 11 12buildPythonPackage rec { 13 pname = "pyspark"; 14 version = "3.5.1"; 15 format = "setuptools"; 16 17 disabled = pythonOlder "3.7"; 18 19 src = fetchPypi { 20 inherit pname version; 21 hash = "sha256-3WVp5Uc2Xq3E+Ie/V/FT5NWCpoxLSQ3kddVbmYFmSRA="; 22 }; 23 24 # pypandoc is broken with pandoc2, so we just lose docs. 25 postPatch = '' 26 sed -i "s/'pypandoc'//" setup.py 27 28 substituteInPlace setup.py \ 29 --replace py4j== 'py4j>=' 30 ''; 31 32 propagatedBuildInputs = [ py4j ]; 33 34 passthru.optional-dependencies = { 35 ml = [ numpy ]; 36 mllib = [ numpy ]; 37 sql = [ 38 numpy 39 pandas 40 pyarrow 41 ]; 42 }; 43 44 # Tests assume running spark instance 45 doCheck = false; 46 47 pythonImportsCheck = [ "pyspark" ]; 48 49 meta = with lib; { 50 description = "Python bindings for Apache Spark"; 51 homepage = "https://github.com/apache/spark/tree/master/python"; 52 sourceProvenance = with sourceTypes; [ 53 fromSource 54 binaryBytecode 55 ]; 56 license = licenses.asl20; 57 maintainers = with maintainers; [ shlevy ]; 58 }; 59}