1{
2 lib,
3 stdenv,
4 buildPythonPackage,
5 fetchFromGitHub,
6 rustPlatform,
7 pytestCheckHook,
8 libiconv,
9 numpy,
10 protobuf,
11 pyarrow,
12 Security,
13 SystemConfiguration,
14}:
15
16let
17 arrow-testing = fetchFromGitHub {
18 name = "arrow-testing";
19 owner = "apache";
20 repo = "arrow-testing";
21 rev = "5bab2f264a23f5af68f69ea93d24ef1e8e77fc88";
22 hash = "sha256-Pxx8ohUpXb5u1995IvXmxQMqWiDJ+7LAll/AjQP7ph8=";
23 };
24
25 parquet-testing = fetchFromGitHub {
26 name = "parquet-testing";
27 owner = "apache";
28 repo = "parquet-testing";
29 rev = "e13af117de7c4f0a4d9908ae3827b3ab119868f3";
30 hash = "sha256-rVI9zyk9IRDlKv4u8BeMb0HRdWLfCpqOlYCeUdA7BB8=";
31 };
32in
33
34buildPythonPackage rec {
35 pname = "datafusion";
36 version = "38.0.1";
37 pyproject = true;
38
39 src = fetchFromGitHub {
40 name = "datafusion-source";
41 owner = "apache";
42 repo = "arrow-datafusion-python";
43 rev = "refs/tags/${version}";
44 hash = "sha256-rBS6i2HqpdhnhZZfO0ywL/e4a+rnUZkHzezKd8PuG80=";
45 };
46
47 cargoDeps = rustPlatform.fetchCargoTarball {
48 name = "datafusion-cargo-deps";
49 inherit src pname version;
50 hash = "sha256-M2ZNAFWdsnN9C4+YbqFxZVH9fHR10Bimf1Xzrd9oy9E=";
51 };
52
53 nativeBuildInputs = with rustPlatform; [
54 cargoSetupHook
55 maturinBuildHook
56 ];
57
58 buildInputs =
59 [ protobuf ]
60 ++ lib.optionals stdenv.isDarwin [
61 libiconv
62 Security
63 SystemConfiguration
64 ];
65
66 propagatedBuildInputs = [ pyarrow ];
67
68 nativeCheckInputs = [
69 pytestCheckHook
70 numpy
71 ];
72 pythonImportsCheck = [ "datafusion" ];
73 pytestFlagsArray = [
74 "--pyargs"
75 pname
76 ];
77
78 preCheck = ''
79 pushd $TMPDIR
80 ln -s ${arrow-testing} ./testing
81 ln -s ${parquet-testing} ./parquet
82 '';
83
84 postCheck = ''
85 popd
86 '';
87
88 meta = with lib; {
89 description = "Extensible query execution framework";
90 longDescription = ''
91 DataFusion is an extensible query execution framework, written in Rust,
92 that uses Apache Arrow as its in-memory format.
93 '';
94 homepage = "https://arrow.apache.org/datafusion/";
95 changelog = "https://github.com/apache/arrow-datafusion-python/blob/${version}/CHANGELOG.md";
96 license = with licenses; [ asl20 ];
97 maintainers = with maintainers; [ cpcloud ];
98 };
99}