1{
2 lib,
3 stdenv,
4 buildPythonPackage,
5 fetchFromGitHub,
6
7 # build-system
8 setuptools,
9 wheel,
10
11 # dependencies
12 click,
13 cloudpickle,
14 fsspec,
15 importlib-metadata,
16 packaging,
17 partd,
18 pyyaml,
19 toolz,
20
21 # optional-dependencies
22 numpy,
23 pyarrow,
24 lz4,
25 pandas,
26 distributed,
27 bokeh,
28 jinja2,
29
30 # tests
31 arrow-cpp,
32 dask-expr,
33 hypothesis,
34 pytest-asyncio,
35 pytest-rerunfailures,
36 pytest-xdist,
37 pytestCheckHook,
38 pythonOlder,
39}:
40
41let
42 self = buildPythonPackage rec {
43 pname = "dask";
44 version = "2024.5.1";
45 pyproject = true;
46
47 disabled = pythonOlder "3.9";
48
49 src = fetchFromGitHub {
50 owner = "dask";
51 repo = "dask";
52 rev = "refs/tags/${version}";
53 hash = "sha256-FzvzmQa9kJAZw67HY+d+3uC6Bd246vp5QsyXepGnKH8=";
54 };
55
56 build-system = [
57 setuptools
58 wheel
59 ];
60
61 dependencies = [
62 click
63 cloudpickle
64 fsspec
65 packaging
66 partd
67 pyyaml
68 importlib-metadata
69 toolz
70 ];
71
72 passthru.optional-dependencies = lib.fix (self: {
73 array = [ numpy ];
74 complete = [
75 pyarrow
76 lz4
77 ] ++ self.array ++ self.dataframe ++ self.distributed ++ self.diagnostics;
78 dataframe = [
79 # dask-expr -> circular dependency with dask-expr
80 numpy
81 pandas
82 ];
83 distributed = [ distributed ];
84 diagnostics = [
85 bokeh
86 jinja2
87 ];
88 });
89
90 nativeCheckInputs =
91 [
92 dask-expr
93 pytestCheckHook
94 pytest-rerunfailures
95 pytest-xdist
96 # from panda[test]
97 hypothesis
98 pytest-asyncio
99 ]
100 ++ passthru.optional-dependencies.array
101 ++ passthru.optional-dependencies.dataframe
102 ++ lib.optionals (!arrow-cpp.meta.broken) [
103 # support is sparse on aarch64
104 pyarrow
105 ];
106
107 dontUseSetuptoolsCheck = true;
108
109 postPatch = ''
110 # versioneer hack to set version of GitHub package
111 echo "def get_versions(): return {'dirty': False, 'error': None, 'full-revisionid': None, 'version': '${version}'}" > dask/_version.py
112
113 substituteInPlace setup.py \
114 --replace "import versioneer" "" \
115 --replace "version=versioneer.get_version()," "version='${version}'," \
116 --replace "cmdclass=versioneer.get_cmdclass()," ""
117
118 substituteInPlace pyproject.toml \
119 --replace ', "versioneer[toml]==0.29"' "" \
120 --replace " --durations=10" "" \
121 --replace " --cov-config=pyproject.toml" "" \
122 --replace "\"-v" "\" "
123 '';
124
125 pytestFlagsArray = [
126 # Rerun failed tests up to three times
127 "--reruns 3"
128 # Don't run tests that require network access
129 "-m 'not network'"
130 ];
131
132 disabledTests =
133 lib.optionals stdenv.isDarwin [
134 # Test requires features of python3Packages.psutil that are
135 # blocked in sandboxed-builds
136 "test_auto_blocksize_csv"
137 # AttributeError: 'str' object has no attribute 'decode'
138 "test_read_dir_nometa"
139 ]
140 ++ lib.optionals (stdenv.isDarwin && stdenv.isAarch64) [
141 # concurrent.futures.process.BrokenProcessPool: A process in the process pool terminated abpruptly...
142 "test_foldby_tree_reduction"
143 "test_to_bag"
144 ]
145 ++ [
146 # https://github.com/dask/dask/issues/10347#issuecomment-1589683941
147 "test_concat_categorical"
148 # AttributeError: 'ArrowStringArray' object has no attribute 'tobytes'. Did you mean: 'nbytes'?
149 "test_dot"
150 "test_dot_nan"
151 "test_merge_column_with_nulls"
152 # FileNotFoundError: [Errno 2] No such file or directory: '/build/tmp301jryv_/createme/0.part'
153 "test_to_csv_nodir"
154 "test_to_json_results"
155 # FutureWarning: Those tests should be working fine when pandas will have been upgraded to 2.1.1
156 "test_apply"
157 "test_apply_infer_columns"
158 ];
159
160 __darwinAllowLocalNetworking = true;
161
162 pythonImportsCheck = [
163 "dask"
164 "dask.bag"
165 "dask.bytes"
166 "dask.diagnostics"
167 ];
168
169 doCheck = false;
170
171 # Enable tests via passthru to avoid cyclic dependency with dask-expr.
172 passthru.tests = {
173 check = self.overridePythonAttrs (old: {
174 doCheck = true;
175 pythonImportsCheck = [
176 # Requires the `dask.optional-dependencies.array` that are only in `nativeCheckInputs`
177 "dask.array"
178 # Requires the `dask.optional-dependencies.dataframe` that are only in `nativeCheckInputs`
179 "dask.dataframe"
180 "dask.dataframe.io"
181 "dask.dataframe.tseries"
182 ] ++ old.pythonImportsCheck;
183 });
184 };
185
186 meta = {
187 description = "Minimal task scheduling abstraction";
188 mainProgram = "dask";
189 homepage = "https://dask.org/";
190 changelog = "https://docs.dask.org/en/latest/changelog.html";
191 license = lib.licenses.bsd3;
192 maintainers = with lib.maintainers; [ GaetanLepage ];
193 };
194 };
195in
196self