1{ apache-beam
2, attrs
3, beautifulsoup4
4, buildPythonPackage
5, click
6, datasets
7, dill
8, dm-tree
9, fetchFromGitHub
10, ffmpeg
11, future
12, imagemagick
13, importlib-resources
14, jax
15, jaxlib
16, jinja2
17, langdetect
18, lib
19, lxml
20, matplotlib
21, mwparserfromhell
22, networkx
23, nltk
24, numpy
25, opencv4
26, pandas
27, pillow
28, promise
29, protobuf
30, psutil
31, pycocotools
32, pydub
33, pytest-xdist
34, pytestCheckHook
35, requests
36, scikit-image
37, scipy
38, six
39, tensorflow
40, tensorflow-metadata
41, termcolor
42, tifffile
43, tqdm
44, zarr
45}:
46
47buildPythonPackage rec {
48 pname = "tensorflow-datasets";
49 version = "4.8.2";
50
51 src = fetchFromGitHub {
52 owner = "tensorflow";
53 repo = "datasets";
54 rev = "refs/tags/v${version}";
55 hash = "sha256-FYFk53WKNQTSrnGGiA6cn9LffbMJkZtjlGuOF52Og7c=";
56 };
57
58 patches = [
59 # addresses https://github.com/tensorflow/datasets/issues/3673
60 ./corruptions.patch
61 ];
62
63 propagatedBuildInputs = [
64 attrs
65 dill
66 dm-tree
67 future
68 importlib-resources
69 numpy
70 promise
71 protobuf
72 psutil
73 requests
74 six
75 tensorflow-metadata
76 termcolor
77 tqdm
78 ];
79
80 pythonImportsCheck = [
81 "tensorflow_datasets"
82 ];
83
84 nativeCheckInputs = [
85 apache-beam
86 beautifulsoup4
87 click
88 datasets
89 ffmpeg
90 imagemagick
91 jax
92 jaxlib
93 jinja2
94 langdetect
95 lxml
96 matplotlib
97 mwparserfromhell
98 networkx
99 nltk
100 opencv4
101 pandas
102 pillow
103 pycocotools
104 pydub
105 pytest-xdist
106 pytestCheckHook
107 scikit-image
108 scipy
109 tensorflow
110 tifffile
111 zarr
112 ];
113
114 disabledTestPaths = [
115 # Sandbox violations: network access, filesystem write attempts outside of build dir, ...
116 "tensorflow_datasets/core/dataset_builder_test.py"
117 "tensorflow_datasets/core/dataset_info_test.py"
118 "tensorflow_datasets/core/features/features_test.py"
119 "tensorflow_datasets/core/github_api/github_path_test.py"
120 "tensorflow_datasets/core/registered_test.py"
121 "tensorflow_datasets/core/utils/gcs_utils_test.py"
122 "tensorflow_datasets/import_without_tf_test.py"
123 "tensorflow_datasets/scripts/cli/build_test.py"
124
125 # Requires `pretty_midi` which is not packaged in `nixpkgs`.
126 "tensorflow_datasets/audio/groove.py"
127 "tensorflow_datasets/datasets/groove/groove_dataset_builder_test.py"
128
129 # Requires `crepe` which is not packaged in `nixpkgs`.
130 "tensorflow_datasets/audio/nsynth.py"
131 "tensorflow_datasets/datasets/nsynth/nsynth_dataset_builder_test.py"
132
133 # Requires `conllu` which is not packaged in `nixpkgs`.
134 "tensorflow_datasets/core/dataset_builders/conll/conllu_dataset_builder_test.py"
135 "tensorflow_datasets/datasets/universal_dependencies/universal_dependencies_dataset_builder_test.py"
136 "tensorflow_datasets/datasets/xtreme_pos/xtreme_pos_dataset_builder_test.py"
137
138 # Requires `gcld3` and `pretty_midi` which are not packaged in `nixpkgs`.
139 "tensorflow_datasets/core/lazy_imports_lib_test.py"
140
141 # Requires `tensorflow_io` which is not packaged in `nixpkgs`.
142 "tensorflow_datasets/core/features/audio_feature_test.py"
143 "tensorflow_datasets/image/lsun_test.py"
144
145 # Requires `envlogger` which is not packaged in `nixpkgs`.
146 "tensorflow_datasets/rlds/locomotion/locomotion_test.py"
147 "tensorflow_datasets/rlds/robosuite_panda_pick_place_can/robosuite_panda_pick_place_can_test.py"
148
149 # Fails with `TypeError: Constant constructor takes either 0 or 2 positional arguments`
150 # deep in TF AutoGraph. Doesn't reproduce in Docker with Ubuntu 22.04 => might be related
151 # to the differences in some of the dependencies?
152 "tensorflow_datasets/rl_unplugged/rlu_atari/rlu_atari_test.py"
153
154 # Fails with `ValueError: setting an array element with a sequence`
155 "tensorflow_datasets/core/dataset_utils_test.py"
156 "tensorflow_datasets/core/features/sequence_feature_test.py"
157
158 # Requires `tensorflow_docs` which is not packaged in `nixpkgs` and the test is for documentation anyway.
159 "tensorflow_datasets/scripts/documentation/build_api_docs_test.py"
160
161 # Not a test, should not be executed.
162 "tensorflow_datasets/testing/test_utils.py"
163
164 # Require `gcld3` and `nltk.punkt` which are not packaged in `nixpkgs`.
165 "tensorflow_datasets/text/c4_test.py"
166 "tensorflow_datasets/text/c4_utils_test.py"
167 ];
168
169 meta = with lib; {
170 description = "Library of datasets ready to use with TensorFlow";
171 homepage = "https://www.tensorflow.org/datasets/overview";
172 license = licenses.asl20;
173 maintainers = with maintainers; [ ndl ];
174 };
175}