1{ lib
2, stdenv
3, buildPythonPackage
4, fetchFromGitHub
5
6# build-system
7, setuptools
8, wheel
9
10# dependencies
11, click
12, cloudpickle
13, fsspec
14, importlib-metadata
15, packaging
16, partd
17, pyyaml
18, toolz
19
20# optional-dependencies
21, numpy
22, pyarrow
23, lz4
24, pandas
25, distributed
26, bokeh
27, jinja2
28
29# tests
30, arrow-cpp
31, hypothesis
32, pytest-asyncio
33, pytest-rerunfailures
34, pytest-xdist
35, pytestCheckHook
36, pythonOlder
37}:
38
39buildPythonPackage rec {
40 pname = "dask";
41 version = "2023.8.0";
42 format = "pyproject";
43
44 disabled = pythonOlder "3.8";
45
46 src = fetchFromGitHub {
47 owner = "dask";
48 repo = "dask";
49 rev = "refs/tags/${version}";
50 hash = "sha256-ZKjfxTJCu3EUOKz16+VP8+cPqQliFNc7AU1FPC1gOXw=";
51 };
52
53 nativeBuildInputs = [
54 setuptools
55 wheel
56 ];
57
58 propagatedBuildInputs = [
59 click
60 cloudpickle
61 fsspec
62 packaging
63 partd
64 pyyaml
65 importlib-metadata
66 toolz
67 ];
68
69 passthru.optional-dependencies = lib.fix (self: {
70 array = [
71 numpy
72 ];
73 complete = [
74 pyarrow
75 lz4
76 ]
77 ++ self.array
78 ++ self.dataframe
79 ++ self.distributed
80 ++ self.diagnostics;
81 dataframe = [
82 numpy
83 pandas
84 ];
85 distributed = [
86 distributed
87 ];
88 diagnostics = [
89 bokeh
90 jinja2
91 ];
92 });
93
94 nativeCheckInputs = [
95 pytestCheckHook
96 pytest-rerunfailures
97 pytest-xdist
98 # from panda[test]
99 hypothesis
100 pytest-asyncio
101 ] ++ lib.optionals (!arrow-cpp.meta.broken) [ # support is sparse on aarch64
102 pyarrow
103 ];
104
105 dontUseSetuptoolsCheck = true;
106
107 postPatch = ''
108 # versioneer hack to set version of GitHub package
109 echo "def get_versions(): return {'dirty': False, 'error': None, 'full-revisionid': None, 'version': '${version}'}" > dask/_version.py
110
111 substituteInPlace setup.py \
112 --replace "import versioneer" "" \
113 --replace "version=versioneer.get_version()," "version='${version}'," \
114 --replace "cmdclass=versioneer.get_cmdclass()," ""
115
116 substituteInPlace pyproject.toml \
117 --replace ', "versioneer[toml]==0.28"' "" \
118 --replace " --durations=10" "" \
119 --replace " --cov-config=pyproject.toml" "" \
120 --replace "\"-v" "\" "
121 '';
122
123 pytestFlagsArray = [
124 # Rerun failed tests up to three times
125 "--reruns 3"
126 # Don't run tests that require network access
127 "-m 'not network'"
128 ];
129
130 disabledTests = lib.optionals stdenv.isDarwin [
131 # Test requires features of python3Packages.psutil that are
132 # blocked in sandboxed-builds
133 "test_auto_blocksize_csv"
134 # AttributeError: 'str' object has no attribute 'decode'
135 "test_read_dir_nometa"
136 ] ++ [
137 # https://github.com/dask/dask/issues/10347#issuecomment-1589683941
138 "test_concat_categorical"
139 # AttributeError: 'ArrowStringArray' object has no attribute 'tobytes'. Did you mean: 'nbytes'?
140 "test_dot"
141 "test_dot_nan"
142 "test_merge_column_with_nulls"
143 ];
144
145 __darwinAllowLocalNetworking = true;
146
147 pythonImportsCheck = [
148 "dask"
149 "dask.array"
150 "dask.bag"
151 "dask.bytes"
152 "dask.dataframe"
153 "dask.dataframe.io"
154 "dask.dataframe.tseries"
155 "dask.diagnostics"
156 ];
157
158 meta = with lib; {
159 description = "Minimal task scheduling abstraction";
160 homepage = "https://dask.org/";
161 changelog = "https://docs.dask.org/en/latest/changelog.html";
162 license = licenses.bsd3;
163 maintainers = with maintainers; [ fridh ];
164 };
165}