1{ lib
2, stdenv
3, arrow-cpp
4, bokeh
5, buildPythonPackage
6, click
7, cloudpickle
8, distributed
9, fastparquet
10, fetchFromGitHub
11, fetchpatch
12, fsspec
13, importlib-metadata
14, jinja2
15, numpy
16, packaging
17, pandas
18, partd
19, pyarrow
20, pytest-rerunfailures
21, pytest-xdist
22, pytestCheckHook
23, pythonOlder
24, pyyaml
25, scipy
26, setuptools
27, toolz
28, versioneer
29, zarr
30}:
31
32buildPythonPackage rec {
33 pname = "dask";
34 version = "2023.4.1";
35 format = "setuptools";
36
37 disabled = pythonOlder "3.8";
38
39 src = fetchFromGitHub {
40 owner = "dask";
41 repo = pname;
42 rev = "refs/tags/${version}";
43 hash = "sha256-PkEFXF6OFZU+EMFBUopv84WniQghr5Q6757Qx6D5MyE=";
44 };
45
46 nativeBuildInputs = [
47 setuptools
48 versioneer
49 ];
50
51 propagatedBuildInputs = [
52 click
53 cloudpickle
54 fsspec
55 packaging
56 partd
57 pyyaml
58 importlib-metadata
59 toolz
60 ];
61
62 passthru.optional-dependencies = {
63 array = [
64 numpy
65 ];
66 complete = [
67 distributed
68 ];
69 dataframe = [
70 numpy
71 pandas
72 ];
73 distributed = [
74 distributed
75 ];
76 diagnostics = [
77 bokeh
78 jinja2
79 ];
80 };
81
82 nativeCheckInputs = [
83 pytestCheckHook
84 pytest-rerunfailures
85 pytest-xdist
86 scipy
87 zarr
88 ] ++ lib.optionals (!arrow-cpp.meta.broken) [ # support is sparse on aarch64
89 fastparquet
90 pyarrow
91 ];
92
93 dontUseSetuptoolsCheck = true;
94
95 postPatch = ''
96 # versioneer hack to set version of GitHub package
97 echo "def get_versions(): return {'dirty': False, 'error': None, 'full-revisionid': None, 'version': '${version}'}" > dask/_version.py
98
99 substituteInPlace setup.py \
100 --replace "version=versioneer.get_version()," "version='${version}'," \
101 --replace "cmdclass=versioneer.get_cmdclass()," ""
102
103 substituteInPlace pyproject.toml \
104 --replace " --durations=10" "" \
105 --replace " --cov-config=pyproject.toml" "" \
106 --replace " -v" ""
107 '';
108
109 pytestFlagsArray = [
110 # Rerun failed tests up to three times
111 "--reruns 3"
112 # Don't run tests that require network access
113 "-m 'not network'"
114 ];
115
116 disabledTests = lib.optionals stdenv.isDarwin [
117 # Test requires features of python3Packages.psutil that are
118 # blocked in sandboxed-builds
119 "test_auto_blocksize_csv"
120 # AttributeError: 'str' object has no attribute 'decode'
121 "test_read_dir_nometa"
122 ] ++ [
123 "test_chunksize_files"
124 # TypeError: 'ArrowStringArray' with dtype string does not support reduction 'min'
125 "test_set_index_string"
126 # numpy 1.24
127 # RuntimeWarning: invalid value encountered in cast
128 "test_setitem_extended_API_2d_mask"
129 ];
130
131 __darwinAllowLocalNetworking = true;
132
133 pythonImportsCheck = [
134 "dask"
135 "dask.array"
136 "dask.bag"
137 "dask.bytes"
138 "dask.dataframe"
139 "dask.dataframe.io"
140 "dask.dataframe.tseries"
141 "dask.diagnostics"
142 ];
143
144 meta = with lib; {
145 description = "Minimal task scheduling abstraction";
146 homepage = "https://dask.org/";
147 changelog = "https://docs.dask.org/en/latest/changelog.html";
148 license = licenses.bsd3;
149 maintainers = with maintainers; [ fridh ];
150 };
151}