1{
2 lib,
3 stdenv,
4 buildPythonPackage,
5 fetchFromGitHub,
6 rustPlatform,
7 pytestCheckHook,
8 libiconv,
9 numpy,
10 protobuf,
11 protoc,
12 pyarrow,
13 typing-extensions,
14 pythonOlder,
15}:
16
17let
18 arrow-testing = fetchFromGitHub {
19 name = "arrow-testing";
20 owner = "apache";
21 repo = "arrow-testing";
22 rev = "4d209492d514c2d3cb2d392681b9aa00e6d8da1c";
23 hash = "sha256-IkiCbuy0bWyClPZ4ZEdkEP7jFYLhM7RCuNLd6Lazd4o=";
24 };
25
26 parquet-testing = fetchFromGitHub {
27 name = "parquet-testing";
28 owner = "apache";
29 repo = "parquet-testing";
30 rev = "50af3d8ce206990d81014b1862e5ce7380dc3e08";
31 hash = "sha256-edyv/r5olkj09aHtm8LHZY0b3jUtLNUcufwI41qKYaY=";
32 };
33in
34
35buildPythonPackage rec {
36 pname = "datafusion";
37 version = "40.1.0";
38 pyproject = true;
39
40 src = fetchFromGitHub {
41 name = "datafusion-source";
42 owner = "apache";
43 repo = "arrow-datafusion-python";
44 tag = version;
45 hash = "sha256-5WOSlx4XW9zO6oTY16lWQElShLv0ubflVPfSSEGrFgg=";
46 };
47
48 cargoDeps = rustPlatform.fetchCargoVendor {
49 name = "datafusion-cargo-deps";
50 inherit src;
51 hash = "sha256-xUpchV4UFEX1HkCpClOwxnEfGLVlOIX4UmzYKiUth9U=";
52 };
53
54 nativeBuildInputs = with rustPlatform; [
55 cargoSetupHook
56 maturinBuildHook
57 protoc
58 ];
59
60 buildInputs = [
61 protobuf
62 ]
63 ++ lib.optionals stdenv.hostPlatform.isDarwin [
64 libiconv
65 ];
66
67 dependencies = [
68 pyarrow
69 typing-extensions
70 ];
71
72 nativeCheckInputs = [
73 pytestCheckHook
74 numpy
75 ];
76
77 pythonImportsCheck = [ "datafusion" ];
78
79 pytestFlags = [
80 "--pyargs"
81 pname
82 ];
83
84 preCheck = ''
85 pushd $TMPDIR
86 ln -s ${arrow-testing} ./testing
87 ln -s ${parquet-testing} ./parquet
88 '';
89
90 postCheck = ''
91 popd
92 '';
93
94 meta = with lib; {
95 description = "Extensible query execution framework";
96 longDescription = ''
97 DataFusion is an extensible query execution framework, written in Rust,
98 that uses Apache Arrow as its in-memory format.
99 '';
100 homepage = "https://arrow.apache.org/datafusion/";
101 changelog = "https://github.com/apache/arrow-datafusion-python/blob/${version}/CHANGELOG.md";
102 license = with licenses; [ asl20 ];
103 maintainers = with maintainers; [ cpcloud ];
104 };
105}