1{ lib 2, stdenv 3, fetchurl 4, buildPythonPackage 5, fetchPypi 6, fetchFromGitHub 7, rustPlatform 8, maturin 9, pytestCheckHook 10, libiconv 11, numpy 12, pandas 13, pyarrow 14, pytest 15}: 16let 17 # le sigh, the perils of unrelated versions of software living in the same 18 # repo: there's no obvious way to map the top level source repo 19 # (arrow-datafusion) version to the version of contained repo 20 # (arrow-datafusion/python) 21 # 22 # A commit hash will do in a pinch, and ultimately the sha256 has the final 23 # say of what the content is when building 24 cargoLock = fetchurl { 25 url = "https://raw.githubusercontent.com/apache/arrow-datafusion/6.0.0/python/Cargo.lock"; 26 sha256 = "sha256-xiv3drEU5jOGsEIh0U01ZQ1NBKobxO2ctp4mxy9iigw="; 27 }; 28 29 postUnpack = '' 30 cp "${cargoLock}" $sourceRoot/Cargo.lock 31 chmod u+w $sourceRoot/Cargo.lock 32 ''; 33in 34buildPythonPackage rec { 35 pname = "datafusion"; 36 version = "0.4.0"; 37 format = "pyproject"; 38 39 src = fetchPypi { 40 inherit pname version; 41 sha256 = "sha256-+YqogteKfNhtI2QbVXv/5CIWm3PcOH653dwONm5ZcL8="; 42 }; 43 44 inherit postUnpack; 45 46 # TODO: remove the patch hacking and postUnpack hooks after 47 # https://github.com/apache/arrow-datafusion/pull/1508 is merged 48 # 49 # the lock file isn't up to date as of 6.0.0 so we need to patch the source 50 # lockfile and the vendored cargo deps lockfile 51 patches = [ ./Cargo.lock.patch ]; 52 cargoDeps = rustPlatform.fetchCargoTarball { 53 inherit src pname version postUnpack; 54 sha256 = "sha256-JGyDxpfBXzduJaMF1sbmRm7KJajHYdVSj+WbiSETiY0="; 55 patches = [ ./Cargo.lock.patch ]; 56 }; 57 58 nativeBuildInputs = with rustPlatform; [ 59 cargoSetupHook 60 maturinBuildHook 61 ]; 62 63 buildInputs = lib.optionals stdenv.isDarwin [ libiconv ]; 64 65 propagatedBuildInputs = [ 66 numpy 67 pandas 68 pyarrow 69 ]; 70 71 checkInputs = [ pytest ]; 72 pythonImportsCheck = [ "datafusion" ]; 73 74 checkPhase = '' 75 runHook preCheck 76 pytest --pyargs "${pname}" 77 runHook postCheck 78 ''; 79 80 meta = with lib; { 81 description = "Extensible query execution framework"; 82 longDescription = '' 83 DataFusion is an extensible query execution framework, written in Rust, 84 that uses Apache Arrow as its in-memory format. 85 ''; 86 homepage = "https://arrow.apache.org/datafusion/"; 87 license = with licenses; [ asl20 ]; 88 maintainers = with maintainers; [ cpcloud ]; 89 }; 90}