1{ apache-beam
2, array-record
3, attrs
4, beautifulsoup4
5, buildPythonPackage
6, click
7, datasets
8, dill
9, dm-tree
10, fetchFromGitHub
11, ffmpeg
12, future
13, imagemagick
14, importlib-resources
15, jax
16, jaxlib
17, jinja2
18, langdetect
19, lib
20, lxml
21, matplotlib
22, mwparserfromhell
23, mwxml
24, networkx
25, nltk
26, numpy
27, opencv4
28, pandas
29, pillow
30, promise
31, protobuf
32, psutil
33, pycocotools
34, pydub
35, pytest-xdist
36, pytestCheckHook
37, requests
38, scikit-image
39, scipy
40, six
41, tensorflow
42, tensorflow-metadata
43, termcolor
44, tifffile
45, tqdm
46, zarr
47}:
48
49buildPythonPackage rec {
50 pname = "tensorflow-datasets";
51 version = "4.9.3";
52
53 src = fetchFromGitHub {
54 owner = "tensorflow";
55 repo = "datasets";
56 rev = "refs/tags/v${version}";
57 hash = "sha256-ZXCcXChrWqs0FAK5Fe8cD+MuJpWa9Dwo/ny5fOX2lKU=";
58 };
59
60 patches = [
61 # addresses https://github.com/tensorflow/datasets/issues/3673
62 ./corruptions.patch
63 ];
64
65 propagatedBuildInputs = [
66 array-record
67 attrs
68 dill
69 dm-tree
70 future
71 importlib-resources
72 numpy
73 promise
74 protobuf
75 psutil
76 requests
77 six
78 tensorflow-metadata
79 termcolor
80 tqdm
81 ];
82
83 pythonImportsCheck = [
84 "tensorflow_datasets"
85 ];
86
87 nativeCheckInputs = [
88 apache-beam
89 beautifulsoup4
90 click
91 datasets
92 ffmpeg
93 imagemagick
94 jax
95 jaxlib
96 jinja2
97 langdetect
98 lxml
99 matplotlib
100 mwparserfromhell
101 mwxml
102 networkx
103 nltk
104 opencv4
105 pandas
106 pillow
107 pycocotools
108 pydub
109 pytest-xdist
110 pytestCheckHook
111 scikit-image
112 scipy
113 tensorflow
114 tifffile
115 zarr
116 ];
117
118 disabledTestPaths = [
119 # Sandbox violations: network access, filesystem write attempts outside of build dir, ...
120 "tensorflow_datasets/core/dataset_builder_test.py"
121 "tensorflow_datasets/core/dataset_info_test.py"
122 "tensorflow_datasets/core/features/features_test.py"
123 "tensorflow_datasets/core/github_api/github_path_test.py"
124 "tensorflow_datasets/core/registered_test.py"
125 "tensorflow_datasets/core/utils/gcs_utils_test.py"
126 "tensorflow_datasets/import_without_tf_test.py"
127 "tensorflow_datasets/proto/build_tf_proto_test.py"
128 "tensorflow_datasets/scripts/cli/build_test.py"
129
130 # Requires `pretty_midi` which is not packaged in `nixpkgs`.
131 "tensorflow_datasets/audio/groove.py"
132 "tensorflow_datasets/datasets/groove/groove_dataset_builder_test.py"
133
134 # Requires `crepe` which is not packaged in `nixpkgs`.
135 "tensorflow_datasets/audio/nsynth.py"
136 "tensorflow_datasets/datasets/nsynth/nsynth_dataset_builder_test.py"
137
138 # Requires `conllu` which is not packaged in `nixpkgs`.
139 "tensorflow_datasets/core/dataset_builders/conll/conllu_dataset_builder_test.py"
140 "tensorflow_datasets/datasets/universal_dependencies/universal_dependencies_dataset_builder_test.py"
141 "tensorflow_datasets/datasets/xtreme_pos/xtreme_pos_dataset_builder_test.py"
142
143 # Requires `gcld3` and `pretty_midi` which are not packaged in `nixpkgs`.
144 "tensorflow_datasets/core/lazy_imports_lib_test.py"
145
146 # Requires `tensorflow_io` which is not packaged in `nixpkgs`.
147 "tensorflow_datasets/core/features/audio_feature_test.py"
148 "tensorflow_datasets/image/lsun_test.py"
149
150 # Requires `envlogger` which is not packaged in `nixpkgs`.
151 "tensorflow_datasets/rlds/locomotion/locomotion_test.py"
152 "tensorflow_datasets/rlds/robosuite_panda_pick_place_can/robosuite_panda_pick_place_can_test.py"
153
154 # Fails with `TypeError: Constant constructor takes either 0 or 2 positional arguments`
155 # deep in TF AutoGraph. Doesn't reproduce in Docker with Ubuntu 22.04 => might be related
156 # to the differences in some of the dependencies?
157 "tensorflow_datasets/rl_unplugged/rlu_atari/rlu_atari_test.py"
158
159 # Fails with `ValueError: setting an array element with a sequence`
160 "tensorflow_datasets/core/dataset_utils_test.py"
161 "tensorflow_datasets/core/features/sequence_feature_test.py"
162
163 # Requires `tensorflow_docs` which is not packaged in `nixpkgs` and the test is for documentation anyway.
164 "tensorflow_datasets/scripts/documentation/build_api_docs_test.py"
165
166 # Not a test, should not be executed.
167 "tensorflow_datasets/testing/test_utils.py"
168
169 # Require `gcld3` and `nltk.punkt` which are not packaged in `nixpkgs`.
170 "tensorflow_datasets/text/c4_test.py"
171 "tensorflow_datasets/text/c4_utils_test.py"
172 ];
173
174 meta = with lib; {
175 description = "Library of datasets ready to use with TensorFlow";
176 homepage = "https://www.tensorflow.org/datasets/overview";
177 license = licenses.asl20;
178 maintainers = with maintainers; [ ndl ];
179 };
180}