1{
2 lib,
3 stdenv,
4 buildPythonPackage,
5 fetchFromGitHub,
6
7 # build-system
8 setuptools,
9
10 # dependencies
11 click,
12 cloudpickle,
13 fsspec,
14 importlib-metadata,
15 packaging,
16 partd,
17 pyyaml,
18 toolz,
19
20 # optional-dependencies
21 numpy,
22 pyarrow,
23 lz4,
24 pandas,
25 distributed,
26 bokeh,
27 jinja2,
28
29 # tests
30 arrow-cpp,
31 dask-expr,
32 hypothesis,
33 pytest-asyncio,
34 pytest-rerunfailures,
35 pytest-xdist,
36 pytestCheckHook,
37}:
38
39let
40 self = buildPythonPackage rec {
41 pname = "dask";
42 version = "2024.11.2";
43 pyproject = true;
44
45 src = fetchFromGitHub {
46 owner = "dask";
47 repo = "dask";
48 rev = "refs/tags/${version}";
49 hash = "sha256-mAdjsfXzHGJ37m4nQbi+A+4qrL/CHcQNuoGaeU9Nwwo=";
50 };
51
52 build-system = [ setuptools ];
53
54 dependencies = [
55 click
56 cloudpickle
57 fsspec
58 packaging
59 partd
60 pyyaml
61 importlib-metadata
62 toolz
63 ];
64
65 optional-dependencies = lib.fix (self: {
66 array = [ numpy ];
67 complete = [
68 pyarrow
69 lz4
70 ] ++ self.array ++ self.dataframe ++ self.distributed ++ self.diagnostics;
71 dataframe = [
72 # dask-expr -> circular dependency with dask-expr
73 numpy
74 pandas
75 ];
76 distributed = [ distributed ];
77 diagnostics = [
78 bokeh
79 jinja2
80 ];
81 });
82
83 nativeCheckInputs =
84 [
85 dask-expr
86 pytestCheckHook
87 pytest-rerunfailures
88 pytest-xdist
89 # from panda[test]
90 hypothesis
91 pytest-asyncio
92 ]
93 ++ self.optional-dependencies.array
94 ++ self.optional-dependencies.dataframe
95 ++ lib.optionals (!arrow-cpp.meta.broken) [
96 # support is sparse on aarch64
97 pyarrow
98 ];
99
100 dontUseSetuptoolsCheck = true;
101
102 postPatch = ''
103 # versioneer hack to set version of GitHub package
104 echo "def get_versions(): return {'dirty': False, 'error': None, 'full-revisionid': None, 'version': '${version}'}" > dask/_version.py
105
106 substituteInPlace setup.py \
107 --replace-fail "import versioneer" "" \
108 --replace-fail "version=versioneer.get_version()," "version='${version}'," \
109 --replace-fail "cmdclass=versioneer.get_cmdclass()," ""
110
111 substituteInPlace pyproject.toml \
112 --replace-fail ', "versioneer[toml]==0.29"' "" \
113 --replace-fail " --durations=10" "" \
114 --replace-fail " --cov-config=pyproject.toml" "" \
115 --replace-fail "\"-v" "\" "
116 '';
117
118 pytestFlagsArray = [
119 # Rerun failed tests up to three times
120 "--reruns 3"
121 # Don't run tests that require network access
122 "-m 'not network'"
123 ];
124
125 disabledTests =
126 lib.optionals stdenv.hostPlatform.isDarwin [
127 # Test requires features of python3Packages.psutil that are
128 # blocked in sandboxed-builds
129 "test_auto_blocksize_csv"
130 # AttributeError: 'str' object has no attribute 'decode'
131 "test_read_dir_nometa"
132 ]
133 ++ lib.optionals (stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isAarch64) [
134 # concurrent.futures.process.BrokenProcessPool: A process in the process pool terminated abpruptly...
135 "test_foldby_tree_reduction"
136 "test_to_bag"
137 ]
138 ++ [
139 # https://github.com/dask/dask/issues/10347#issuecomment-1589683941
140 "test_concat_categorical"
141 # AttributeError: 'ArrowStringArray' object has no attribute 'tobytes'. Did you mean: 'nbytes'?
142 "test_dot"
143 "test_dot_nan"
144 "test_merge_column_with_nulls"
145 # FileNotFoundError: [Errno 2] No such file or directory: '/build/tmp301jryv_/createme/0.part'
146 "test_to_csv_nodir"
147 "test_to_json_results"
148 # FutureWarning: Those tests should be working fine when pandas will have been upgraded to 2.1.1
149 "test_apply"
150 "test_apply_infer_columns"
151 ];
152
153 __darwinAllowLocalNetworking = true;
154
155 pythonImportsCheck = [
156 "dask"
157 "dask.bag"
158 "dask.bytes"
159 "dask.diagnostics"
160 ];
161
162 doCheck = false;
163
164 # Enable tests via passthru to avoid cyclic dependency with dask-expr.
165 passthru.tests = {
166 check = self.overridePythonAttrs (old: {
167 doCheck = true;
168 pythonImportsCheck = [
169 # Requires the `dask.optional-dependencies.array` that are only in `nativeCheckInputs`
170 "dask.array"
171 # Requires the `dask.optional-dependencies.dataframe` that are only in `nativeCheckInputs`
172 "dask.dataframe"
173 "dask.dataframe.io"
174 "dask.dataframe.tseries"
175 ] ++ old.pythonImportsCheck;
176 });
177 };
178
179 meta = {
180 description = "Minimal task scheduling abstraction";
181 mainProgram = "dask";
182 homepage = "https://dask.org/";
183 changelog = "https://docs.dask.org/en/latest/changelog.html";
184 license = lib.licenses.bsd3;
185 maintainers = with lib.maintainers; [ GaetanLepage ];
186 };
187 };
188in
189self