1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5
6 # dependencies
7 array-record,
8 dill,
9 dm-tree,
10 future,
11 immutabledict,
12 importlib-resources,
13 numpy,
14 promise,
15 protobuf,
16 psutil,
17 requests,
18 simple-parsing,
19 six,
20 tensorflow-metadata,
21 termcolor,
22 tqdm,
23
24 # tests
25 apache-beam,
26 beautifulsoup4,
27 click,
28 cloudpickle,
29 datasets,
30 ffmpeg,
31 imagemagick,
32 jax,
33 jaxlib,
34 jinja2,
35 langdetect,
36 lxml,
37 matplotlib,
38 mlcroissant,
39 mwparserfromhell,
40 mwxml,
41 networkx,
42 nltk,
43 opencv4,
44 pandas,
45 pillow,
46 pycocotools,
47 pydub,
48 pytest-xdist,
49 pytestCheckHook,
50 scikit-image,
51 scipy,
52 sortedcontainers,
53 tensorflow,
54 tifffile,
55 zarr,
56}:
57
58buildPythonPackage rec {
59 pname = "tensorflow-datasets";
60 version = "4.9.9";
61 pyproject = true;
62
63 src = fetchFromGitHub {
64 owner = "tensorflow";
65 repo = "datasets";
66 tag = "v${version}";
67 hash = "sha256-ZXaPYmj8aozfe6ygzKybId8RZ1TqPuIOSpd8XxnRHus=";
68 };
69
70 dependencies = [
71 array-record
72 dill
73 dm-tree
74 future
75 immutabledict
76 importlib-resources
77 numpy
78 promise
79 protobuf
80 psutil
81 requests
82 simple-parsing
83 six
84 tensorflow-metadata
85 termcolor
86 tqdm
87 ];
88
89 pythonImportsCheck = [ "tensorflow_datasets" ];
90
91 nativeCheckInputs = [
92 apache-beam
93 beautifulsoup4
94 click
95 cloudpickle
96 datasets
97 ffmpeg
98 imagemagick
99 jax
100 jaxlib
101 jinja2
102 langdetect
103 lxml
104 matplotlib
105 mlcroissant
106 mwparserfromhell
107 mwxml
108 networkx
109 nltk
110 opencv4
111 pandas
112 pillow
113 pycocotools
114 pydub
115 pytest-xdist
116 pytestCheckHook
117 scikit-image
118 scipy
119 sortedcontainers
120 tensorflow
121 tifffile
122 zarr
123 ];
124
125 pytestFlagsArray = [
126 # AttributeError: 'NoneType' object has no attribute 'Table'
127 "--deselect=tensorflow_datasets/core/file_adapters_test.py::test_read_write"
128 "--deselect=tensorflow_datasets/text/c4_wsrs/c4_wsrs_test.py::C4WSRSTest"
129 ];
130
131 disabledTests = [
132 # Since updating apache-beam to 2.65.0
133 # RuntimeError: Unable to pickle fn CallableWrapperDoFn...: maximum recursion depth exceeded
134 # https://github.com/tensorflow/datasets/issues/11055
135 "test_download_and_prepare_as_dataset"
136 ];
137
138 disabledTestPaths = [
139 # Sandbox violations: network access, filesystem write attempts outside of build dir, ...
140 "tensorflow_datasets/core/dataset_builder_test.py"
141 "tensorflow_datasets/core/dataset_info_test.py"
142 "tensorflow_datasets/core/features/features_test.py"
143 "tensorflow_datasets/core/github_api/github_path_test.py"
144 "tensorflow_datasets/core/registered_test.py"
145 "tensorflow_datasets/core/utils/gcs_utils_test.py"
146 "tensorflow_datasets/import_without_tf_test.py"
147 "tensorflow_datasets/proto/build_tf_proto_test.py"
148 "tensorflow_datasets/scripts/cli/build_test.py"
149 "tensorflow_datasets/datasets/imagenet2012_corrupted/imagenet2012_corrupted_dataset_builder_test.py"
150
151 # Requires `pretty_midi` which is not packaged in `nixpkgs`.
152 "tensorflow_datasets/audio/groove.py"
153 "tensorflow_datasets/datasets/groove/groove_dataset_builder_test.py"
154
155 # Requires `crepe` which is not packaged in `nixpkgs`.
156 "tensorflow_datasets/audio/nsynth.py"
157 "tensorflow_datasets/datasets/nsynth/nsynth_dataset_builder_test.py"
158
159 # Requires `conllu` which is not packaged in `nixpkgs`.
160 "tensorflow_datasets/core/dataset_builders/conll/conllu_dataset_builder_test.py"
161 "tensorflow_datasets/datasets/universal_dependencies/universal_dependencies_dataset_builder_test.py"
162 "tensorflow_datasets/datasets/xtreme_pos/xtreme_pos_dataset_builder_test.py"
163
164 # Requires `gcld3` and `pretty_midi` which are not packaged in `nixpkgs`.
165 "tensorflow_datasets/core/lazy_imports_lib_test.py"
166
167 # AttributeError: 'NoneType' object has no attribute 'Table'
168 "tensorflow_datasets/core/dataset_builder_beam_test.py"
169 "tensorflow_datasets/core/dataset_builders/adhoc_builder_test.py"
170 "tensorflow_datasets/core/split_builder_test.py"
171 "tensorflow_datasets/core/writer_test.py"
172
173 # Requires `tensorflow_io` which is not packaged in `nixpkgs`.
174 "tensorflow_datasets/core/features/audio_feature_test.py"
175 "tensorflow_datasets/image/lsun_test.py"
176
177 # Fails with `TypeError: Constant constructor takes either 0 or 2 positional arguments`
178 # deep in TF AutoGraph. Doesn't reproduce in Docker with Ubuntu 22.04 => might be related
179 # to the differences in some of the dependencies?
180 "tensorflow_datasets/rl_unplugged/rlu_atari/rlu_atari_test.py"
181
182 # Fails with `ValueError: setting an array element with a sequence`
183 "tensorflow_datasets/core/dataset_utils_test.py"
184 "tensorflow_datasets/core/features/sequence_feature_test.py"
185
186 # Requires `tensorflow_docs` which is not packaged in `nixpkgs` and the test is for documentation anyway.
187 "tensorflow_datasets/scripts/documentation/build_api_docs_test.py"
188
189 # Not a test, should not be executed.
190 "tensorflow_datasets/testing/test_utils.py"
191
192 # Require `gcld3` and `nltk.punkt` which are not packaged in `nixpkgs`.
193 "tensorflow_datasets/text/c4_test.py"
194 "tensorflow_datasets/text/c4_utils_test.py"
195 ];
196
197 meta = {
198 description = "Library of datasets ready to use with TensorFlow";
199 homepage = "https://www.tensorflow.org/datasets/overview";
200 changelog = "https://github.com/tensorflow/datasets/releases/tag/v${version}";
201 license = lib.licenses.asl20;
202 maintainers = with lib.maintainers; [ ndl ];
203 };
204}