1{
2 lib,
3 stdenv,
4 buildPythonPackage,
5 fetchFromGitHub,
6 rustPlatform,
7 pytestCheckHook,
8 libiconv,
9 numpy,
10 protobuf,
11 protoc,
12 pyarrow,
13 typing-extensions,
14 pythonOlder,
15}:
16
17let
18 arrow-testing = fetchFromGitHub {
19 name = "arrow-testing";
20 owner = "apache";
21 repo = "arrow-testing";
22 rev = "4d209492d514c2d3cb2d392681b9aa00e6d8da1c";
23 hash = "sha256-IkiCbuy0bWyClPZ4ZEdkEP7jFYLhM7RCuNLd6Lazd4o=";
24 };
25
26 parquet-testing = fetchFromGitHub {
27 name = "parquet-testing";
28 owner = "apache";
29 repo = "parquet-testing";
30 rev = "50af3d8ce206990d81014b1862e5ce7380dc3e08";
31 hash = "sha256-edyv/r5olkj09aHtm8LHZY0b3jUtLNUcufwI41qKYaY=";
32 };
33in
34
35buildPythonPackage rec {
36 pname = "datafusion";
37 version = "40.1.0";
38 pyproject = true;
39
40 src = fetchFromGitHub {
41 name = "datafusion-source";
42 owner = "apache";
43 repo = "arrow-datafusion-python";
44 tag = version;
45 hash = "sha256-5WOSlx4XW9zO6oTY16lWQElShLv0ubflVPfSSEGrFgg=";
46 };
47
48 cargoDeps = rustPlatform.fetchCargoVendor {
49 name = "datafusion-cargo-deps";
50 inherit src;
51 hash = "sha256-xUpchV4UFEX1HkCpClOwxnEfGLVlOIX4UmzYKiUth9U=";
52 };
53
54 nativeBuildInputs = with rustPlatform; [
55 cargoSetupHook
56 maturinBuildHook
57 protoc
58 ];
59
60 buildInputs =
61 [ protobuf ]
62 ++ lib.optionals stdenv.hostPlatform.isDarwin [
63 libiconv
64 ];
65
66 dependencies = [
67 pyarrow
68 typing-extensions
69 ];
70
71 nativeCheckInputs = [
72 pytestCheckHook
73 numpy
74 ];
75
76 pythonImportsCheck = [ "datafusion" ];
77
78 pytestFlagsArray = [
79 "--pyargs"
80 pname
81 ];
82
83 preCheck = ''
84 pushd $TMPDIR
85 ln -s ${arrow-testing} ./testing
86 ln -s ${parquet-testing} ./parquet
87 '';
88
89 postCheck = ''
90 popd
91 '';
92
93 meta = with lib; {
94 description = "Extensible query execution framework";
95 longDescription = ''
96 DataFusion is an extensible query execution framework, written in Rust,
97 that uses Apache Arrow as its in-memory format.
98 '';
99 homepage = "https://arrow.apache.org/datafusion/";
100 changelog = "https://github.com/apache/arrow-datafusion-python/blob/${version}/CHANGELOG.md";
101 license = with licenses; [ asl20 ];
102 maintainers = with maintainers; [ cpcloud ];
103 };
104}