1{ lib
2, stdenv
3, buildPythonPackage
4, fetchFromGitHub
5
6# build-system
7, setuptools
8, wheel
9
10# dependencies
11, click
12, cloudpickle
13, fsspec
14, importlib-metadata
15, packaging
16, partd
17, pyyaml
18, toolz
19
20# optional-dependencies
21, numpy
22, pyarrow
23, lz4
24, pandas
25, distributed
26, bokeh
27, jinja2
28
29# tests
30, arrow-cpp
31, hypothesis
32, pytest-asyncio
33, pytest-rerunfailures
34, pytest-xdist
35, pytestCheckHook
36, pythonOlder
37}:
38
39buildPythonPackage rec {
40 pname = "dask";
41 version = "2023.10.1";
42 pyproject = true;
43
44 disabled = pythonOlder "3.9";
45
46 src = fetchFromGitHub {
47 owner = "dask";
48 repo = "dask";
49 rev = "refs/tags/${version}";
50 hash = "sha256-asD5oLd7XcZ8ZFSrsSCAKgZ3Gsqs6T77nb1qesamgUI=";
51 };
52
53 nativeBuildInputs = [
54 setuptools
55 wheel
56 ];
57
58 propagatedBuildInputs = [
59 click
60 cloudpickle
61 fsspec
62 packaging
63 partd
64 pyyaml
65 importlib-metadata
66 toolz
67 ];
68
69 passthru.optional-dependencies = lib.fix (self: {
70 array = [
71 numpy
72 ];
73 complete = [
74 pyarrow
75 lz4
76 ]
77 ++ self.array
78 ++ self.dataframe
79 ++ self.distributed
80 ++ self.diagnostics;
81 dataframe = [
82 numpy
83 pandas
84 ];
85 distributed = [
86 distributed
87 ];
88 diagnostics = [
89 bokeh
90 jinja2
91 ];
92 });
93
94 nativeCheckInputs = [
95 pytestCheckHook
96 pytest-rerunfailures
97 pytest-xdist
98 # from panda[test]
99 hypothesis
100 pytest-asyncio
101 ] ++ lib.optionals (!arrow-cpp.meta.broken) [ # support is sparse on aarch64
102 pyarrow
103 ];
104
105 dontUseSetuptoolsCheck = true;
106
107 postPatch = ''
108 # versioneer hack to set version of GitHub package
109 echo "def get_versions(): return {'dirty': False, 'error': None, 'full-revisionid': None, 'version': '${version}'}" > dask/_version.py
110
111 substituteInPlace setup.py \
112 --replace "import versioneer" "" \
113 --replace "version=versioneer.get_version()," "version='${version}'," \
114 --replace "cmdclass=versioneer.get_cmdclass()," ""
115
116 substituteInPlace pyproject.toml \
117 --replace ', "versioneer[toml]==0.29"' "" \
118 --replace " --durations=10" "" \
119 --replace " --cov-config=pyproject.toml" "" \
120 --replace "\"-v" "\" "
121 '';
122
123 pytestFlagsArray = [
124 # Rerun failed tests up to three times
125 "--reruns 3"
126 # Don't run tests that require network access
127 "-m 'not network'"
128 ];
129
130 disabledTests = lib.optionals stdenv.isDarwin [
131 # Test requires features of python3Packages.psutil that are
132 # blocked in sandboxed-builds
133 "test_auto_blocksize_csv"
134 # AttributeError: 'str' object has no attribute 'decode'
135 "test_read_dir_nometa"
136 ] ++ lib.optionals (stdenv.isDarwin && stdenv.isAarch64) [
137 # concurrent.futures.process.BrokenProcessPool: A process in the process pool terminated abpruptly...
138 "test_foldby_tree_reduction"
139 "test_to_bag"
140 ] ++ [
141 # https://github.com/dask/dask/issues/10347#issuecomment-1589683941
142 "test_concat_categorical"
143 # AttributeError: 'ArrowStringArray' object has no attribute 'tobytes'. Did you mean: 'nbytes'?
144 "test_dot"
145 "test_dot_nan"
146 "test_merge_column_with_nulls"
147 # FileNotFoundError: [Errno 2] No such file or directory: '/build/tmp301jryv_/createme/0.part'
148 "test_to_csv_nodir"
149 "test_to_json_results"
150 # FutureWarning: Those tests should be working fine when pandas will have been upgraded to 2.1.1
151 "test_apply"
152 "test_apply_infer_columns"
153 ];
154
155 __darwinAllowLocalNetworking = true;
156
157 pythonImportsCheck = [
158 "dask"
159 "dask.array"
160 "dask.bag"
161 "dask.bytes"
162 "dask.dataframe"
163 "dask.dataframe.io"
164 "dask.dataframe.tseries"
165 "dask.diagnostics"
166 ];
167
168 meta = with lib; {
169 description = "Minimal task scheduling abstraction";
170 homepage = "https://dask.org/";
171 changelog = "https://docs.dask.org/en/latest/changelog.html";
172 license = licenses.bsd3;
173 maintainers = with maintainers; [ fridh ];
174 };
175}