1{
2 apache-beam,
3 array-record,
4 attrs,
5 beautifulsoup4,
6 buildPythonPackage,
7 click,
8 datasets,
9 dill,
10 dm-tree,
11 fetchFromGitHub,
12 ffmpeg,
13 future,
14 imagemagick,
15 importlib-resources,
16 jax,
17 jaxlib,
18 jinja2,
19 langdetect,
20 lib,
21 lxml,
22 matplotlib,
23 mwparserfromhell,
24 mwxml,
25 networkx,
26 nltk,
27 numpy,
28 opencv4,
29 pandas,
30 pillow,
31 promise,
32 protobuf,
33 psutil,
34 pycocotools,
35 pydub,
36 pytest-xdist,
37 pytestCheckHook,
38 requests,
39 scikit-image,
40 scipy,
41 six,
42 tensorflow,
43 tensorflow-metadata,
44 termcolor,
45 tifffile,
46 tqdm,
47 zarr,
48}:
49
50buildPythonPackage rec {
51 pname = "tensorflow-datasets";
52 version = "4.9.4";
53 format = "setuptools";
54
55 src = fetchFromGitHub {
56 owner = "tensorflow";
57 repo = "datasets";
58 rev = "refs/tags/v${version}";
59 hash = "sha256-HY/atBEWeEJgBNxEapq9jPFoZbFof2AHEDAiJa/lYAE=";
60 };
61
62 patches = [
63 # addresses https://github.com/tensorflow/datasets/issues/3673
64 ./corruptions.patch
65 ];
66
67 propagatedBuildInputs = [
68 array-record
69 attrs
70 dill
71 dm-tree
72 future
73 importlib-resources
74 numpy
75 promise
76 protobuf
77 psutil
78 requests
79 six
80 tensorflow-metadata
81 termcolor
82 tqdm
83 ];
84
85 pythonImportsCheck = [ "tensorflow_datasets" ];
86
87 nativeCheckInputs = [
88 apache-beam
89 beautifulsoup4
90 click
91 datasets
92 ffmpeg
93 imagemagick
94 jax
95 jaxlib
96 jinja2
97 langdetect
98 lxml
99 matplotlib
100 mwparserfromhell
101 mwxml
102 networkx
103 nltk
104 opencv4
105 pandas
106 pillow
107 pycocotools
108 pydub
109 pytest-xdist
110 pytestCheckHook
111 scikit-image
112 scipy
113 tensorflow
114 tifffile
115 zarr
116 ];
117
118 disabledTestPaths = [
119 # Sandbox violations: network access, filesystem write attempts outside of build dir, ...
120 "tensorflow_datasets/core/dataset_builder_test.py"
121 "tensorflow_datasets/core/dataset_info_test.py"
122 "tensorflow_datasets/core/features/features_test.py"
123 "tensorflow_datasets/core/github_api/github_path_test.py"
124 "tensorflow_datasets/core/registered_test.py"
125 "tensorflow_datasets/core/utils/gcs_utils_test.py"
126 "tensorflow_datasets/import_without_tf_test.py"
127 "tensorflow_datasets/proto/build_tf_proto_test.py"
128 "tensorflow_datasets/scripts/cli/build_test.py"
129
130 # Requires `pretty_midi` which is not packaged in `nixpkgs`.
131 "tensorflow_datasets/audio/groove.py"
132 "tensorflow_datasets/datasets/groove/groove_dataset_builder_test.py"
133
134 # Requires `crepe` which is not packaged in `nixpkgs`.
135 "tensorflow_datasets/audio/nsynth.py"
136 "tensorflow_datasets/datasets/nsynth/nsynth_dataset_builder_test.py"
137
138 # Requires `conllu` which is not packaged in `nixpkgs`.
139 "tensorflow_datasets/core/dataset_builders/conll/conllu_dataset_builder_test.py"
140 "tensorflow_datasets/datasets/universal_dependencies/universal_dependencies_dataset_builder_test.py"
141 "tensorflow_datasets/datasets/xtreme_pos/xtreme_pos_dataset_builder_test.py"
142
143 # Requires `gcld3` and `pretty_midi` which are not packaged in `nixpkgs`.
144 "tensorflow_datasets/core/lazy_imports_lib_test.py"
145
146 # Requires `tensorflow_io` which is not packaged in `nixpkgs`.
147 "tensorflow_datasets/core/features/audio_feature_test.py"
148 "tensorflow_datasets/image/lsun_test.py"
149
150 # Requires `envlogger` which is not packaged in `nixpkgs`.
151 "tensorflow_datasets/rlds/locomotion/locomotion_test.py"
152 "tensorflow_datasets/rlds/robosuite_panda_pick_place_can/robosuite_panda_pick_place_can_test.py"
153
154 # Fails with `TypeError: Constant constructor takes either 0 or 2 positional arguments`
155 # deep in TF AutoGraph. Doesn't reproduce in Docker with Ubuntu 22.04 => might be related
156 # to the differences in some of the dependencies?
157 "tensorflow_datasets/rl_unplugged/rlu_atari/rlu_atari_test.py"
158
159 # Fails with `ValueError: setting an array element with a sequence`
160 "tensorflow_datasets/core/dataset_utils_test.py"
161 "tensorflow_datasets/core/features/sequence_feature_test.py"
162
163 # Requires `tensorflow_docs` which is not packaged in `nixpkgs` and the test is for documentation anyway.
164 "tensorflow_datasets/scripts/documentation/build_api_docs_test.py"
165
166 # Not a test, should not be executed.
167 "tensorflow_datasets/testing/test_utils.py"
168
169 # Require `gcld3` and `nltk.punkt` which are not packaged in `nixpkgs`.
170 "tensorflow_datasets/text/c4_test.py"
171 "tensorflow_datasets/text/c4_utils_test.py"
172 ];
173
174 meta = with lib; {
175 description = "Library of datasets ready to use with TensorFlow";
176 homepage = "https://www.tensorflow.org/datasets/overview";
177 license = licenses.asl20;
178 maintainers = with maintainers; [ ndl ];
179 };
180}