1{ lib
2, stdenv
3, fetchurl
4, buildPythonPackage
5, fetchPypi
6, fetchFromGitHub
7, rustPlatform
8, maturin
9, pytestCheckHook
10, libiconv
11, numpy
12, pandas
13, pyarrow
14, pytest
15}:
16let
17 # le sigh, the perils of unrelated versions of software living in the same
18 # repo: there's no obvious way to map the top level source repo
19 # (arrow-datafusion) version to the version of contained repo
20 # (arrow-datafusion/python)
21 #
22 # A commit hash will do in a pinch, and ultimately the sha256 has the final
23 # say of what the content is when building
24 cargoLock = fetchurl {
25 url = "https://raw.githubusercontent.com/apache/arrow-datafusion/6.0.0/python/Cargo.lock";
26 sha256 = "sha256-xiv3drEU5jOGsEIh0U01ZQ1NBKobxO2ctp4mxy9iigw=";
27 };
28
29 postUnpack = ''
30 cp "${cargoLock}" $sourceRoot/Cargo.lock
31 chmod u+w $sourceRoot/Cargo.lock
32 '';
33in
34buildPythonPackage rec {
35 pname = "datafusion";
36 version = "0.4.0";
37 format = "pyproject";
38
39 src = fetchPypi {
40 inherit pname version;
41 sha256 = "sha256-+YqogteKfNhtI2QbVXv/5CIWm3PcOH653dwONm5ZcL8=";
42 };
43
44 inherit postUnpack;
45
46 # TODO: remove the patch hacking and postUnpack hooks after
47 # https://github.com/apache/arrow-datafusion/pull/1508 is merged
48 #
49 # the lock file isn't up to date as of 6.0.0 so we need to patch the source
50 # lockfile and the vendored cargo deps lockfile
51 patches = [ ./Cargo.lock.patch ];
52 cargoDeps = rustPlatform.fetchCargoTarball {
53 inherit src pname version postUnpack;
54 sha256 = "sha256-JGyDxpfBXzduJaMF1sbmRm7KJajHYdVSj+WbiSETiY0=";
55 patches = [ ./Cargo.lock.patch ];
56 };
57
58 nativeBuildInputs = with rustPlatform; [
59 cargoSetupHook
60 maturinBuildHook
61 ];
62
63 buildInputs = lib.optionals stdenv.isDarwin [ libiconv ];
64
65 propagatedBuildInputs = [
66 numpy
67 pandas
68 pyarrow
69 ];
70
71 checkInputs = [ pytest ];
72 pythonImportsCheck = [ "datafusion" ];
73
74 checkPhase = ''
75 runHook preCheck
76 pytest --pyargs "${pname}"
77 runHook postCheck
78 '';
79
80 meta = with lib; {
81 description = "Extensible query execution framework";
82 longDescription = ''
83 DataFusion is an extensible query execution framework, written in Rust,
84 that uses Apache Arrow as its in-memory format.
85 '';
86 homepage = "https://arrow.apache.org/datafusion/";
87 license = with licenses; [ asl20 ];
88 maintainers = with maintainers; [ cpcloud ];
89 };
90}