1{ 2 lib, 3 stdenv, 4 buildPythonPackage, 5 fetchFromGitHub, 6 rustPlatform, 7 pytestCheckHook, 8 libiconv, 9 numpy, 10 protobuf, 11 protoc, 12 pyarrow, 13 typing-extensions, 14 pythonOlder, 15}: 16 17let 18 arrow-testing = fetchFromGitHub { 19 name = "arrow-testing"; 20 owner = "apache"; 21 repo = "arrow-testing"; 22 rev = "4d209492d514c2d3cb2d392681b9aa00e6d8da1c"; 23 hash = "sha256-IkiCbuy0bWyClPZ4ZEdkEP7jFYLhM7RCuNLd6Lazd4o="; 24 }; 25 26 parquet-testing = fetchFromGitHub { 27 name = "parquet-testing"; 28 owner = "apache"; 29 repo = "parquet-testing"; 30 rev = "50af3d8ce206990d81014b1862e5ce7380dc3e08"; 31 hash = "sha256-edyv/r5olkj09aHtm8LHZY0b3jUtLNUcufwI41qKYaY="; 32 }; 33in 34 35buildPythonPackage rec { 36 pname = "datafusion"; 37 version = "40.1.0"; 38 pyproject = true; 39 40 src = fetchFromGitHub { 41 name = "datafusion-source"; 42 owner = "apache"; 43 repo = "arrow-datafusion-python"; 44 tag = version; 45 hash = "sha256-5WOSlx4XW9zO6oTY16lWQElShLv0ubflVPfSSEGrFgg="; 46 }; 47 48 cargoDeps = rustPlatform.fetchCargoVendor { 49 name = "datafusion-cargo-deps"; 50 inherit src; 51 hash = "sha256-xUpchV4UFEX1HkCpClOwxnEfGLVlOIX4UmzYKiUth9U="; 52 }; 53 54 nativeBuildInputs = with rustPlatform; [ 55 cargoSetupHook 56 maturinBuildHook 57 protoc 58 ]; 59 60 buildInputs = 61 [ protobuf ] 62 ++ lib.optionals stdenv.hostPlatform.isDarwin [ 63 libiconv 64 ]; 65 66 dependencies = [ 67 pyarrow 68 typing-extensions 69 ]; 70 71 nativeCheckInputs = [ 72 pytestCheckHook 73 numpy 74 ]; 75 76 pythonImportsCheck = [ "datafusion" ]; 77 78 pytestFlagsArray = [ 79 "--pyargs" 80 pname 81 ]; 82 83 preCheck = '' 84 pushd $TMPDIR 85 ln -s ${arrow-testing} ./testing 86 ln -s ${parquet-testing} ./parquet 87 ''; 88 89 postCheck = '' 90 popd 91 ''; 92 93 meta = with lib; { 94 description = "Extensible query execution framework"; 95 longDescription = '' 96 DataFusion is an extensible query execution framework, written in Rust, 97 that uses Apache Arrow as its in-memory format. 98 ''; 99 homepage = "https://arrow.apache.org/datafusion/"; 100 changelog = "https://github.com/apache/arrow-datafusion-python/blob/${version}/CHANGELOG.md"; 101 license = with licenses; [ asl20 ]; 102 maintainers = with maintainers; [ cpcloud ]; 103 }; 104}