1{ 2 lib, 3 buildPythonPackage, 4 fetchFromGitHub, 5 fetchpatch, 6 7 # dependencies 8 array-record, 9 dill, 10 dm-tree, 11 future, 12 immutabledict, 13 importlib-resources, 14 numpy, 15 promise, 16 protobuf, 17 psutil, 18 requests, 19 simple-parsing, 20 six, 21 tensorflow-metadata, 22 termcolor, 23 tqdm, 24 25 # tests 26 apache-beam, 27 beautifulsoup4, 28 click, 29 datasets, 30 ffmpeg, 31 imagemagick, 32 jax, 33 jaxlib, 34 jinja2, 35 langdetect, 36 lxml, 37 matplotlib, 38 mlcroissant, 39 mwparserfromhell, 40 mwxml, 41 networkx, 42 nltk, 43 opencv4, 44 pandas, 45 pillow, 46 pycocotools, 47 pydub, 48 pytest-xdist, 49 pytestCheckHook, 50 scikit-image, 51 scipy, 52 sortedcontainers, 53 tensorflow, 54 tifffile, 55 zarr, 56}: 57 58buildPythonPackage rec { 59 pname = "tensorflow-datasets"; 60 version = "4.9.8"; 61 pyproject = true; 62 63 src = fetchFromGitHub { 64 owner = "tensorflow"; 65 repo = "datasets"; 66 tag = "v${version}"; 67 hash = "sha256-nqveZ+8b0f5sGIn6WufKeA37yEsZjzhCIbCfwMZ9JOM="; 68 }; 69 70 patches = [ 71 # mlmlcroissant uses encoding_formats, not encoding_formats. 72 # Backport https://github.com/tensorflow/datasets/pull/11037 until released. 73 (fetchpatch { 74 url = "https://github.com/tensorflow/datasets/commit/92cbcff725a1036569a515cc3356aa8480740451.patch"; 75 hash = "sha256-2hnMvQP83+eAJllce19aHujcoWQzUz3+LsasWCo4BtM="; 76 }) 77 ]; 78 79 dependencies = [ 80 array-record 81 dill 82 dm-tree 83 future 84 immutabledict 85 importlib-resources 86 numpy 87 promise 88 protobuf 89 psutil 90 requests 91 simple-parsing 92 six 93 tensorflow-metadata 94 termcolor 95 tqdm 96 ]; 97 98 pythonImportsCheck = [ "tensorflow_datasets" ]; 99 100 nativeCheckInputs = [ 101 apache-beam 102 beautifulsoup4 103 click 104 datasets 105 ffmpeg 106 imagemagick 107 jax 108 jaxlib 109 jinja2 110 langdetect 111 lxml 112 matplotlib 113 mlcroissant 114 mwparserfromhell 115 mwxml 116 networkx 117 nltk 118 opencv4 119 pandas 120 pillow 121 pycocotools 122 pydub 123 pytest-xdist 124 pytestCheckHook 125 scikit-image 126 scipy 127 sortedcontainers 128 tensorflow 129 tifffile 130 zarr 131 ]; 132 133 pytestFlagsArray = [ 134 # AttributeError: 'NoneType' object has no attribute 'Table' 135 "--deselect=tensorflow_datasets/core/file_adapters_test.py::test_read_write" 136 "--deselect=tensorflow_datasets/text/c4_wsrs/c4_wsrs_test.py::C4WSRSTest" 137 ]; 138 139 disabledTestPaths = [ 140 # Sandbox violations: network access, filesystem write attempts outside of build dir, ... 141 "tensorflow_datasets/core/dataset_builder_test.py" 142 "tensorflow_datasets/core/dataset_info_test.py" 143 "tensorflow_datasets/core/features/features_test.py" 144 "tensorflow_datasets/core/github_api/github_path_test.py" 145 "tensorflow_datasets/core/registered_test.py" 146 "tensorflow_datasets/core/utils/gcs_utils_test.py" 147 "tensorflow_datasets/import_without_tf_test.py" 148 "tensorflow_datasets/proto/build_tf_proto_test.py" 149 "tensorflow_datasets/scripts/cli/build_test.py" 150 "tensorflow_datasets/datasets/imagenet2012_corrupted/imagenet2012_corrupted_dataset_builder_test.py" 151 152 # Requires `pretty_midi` which is not packaged in `nixpkgs`. 153 "tensorflow_datasets/audio/groove.py" 154 "tensorflow_datasets/datasets/groove/groove_dataset_builder_test.py" 155 156 # Requires `crepe` which is not packaged in `nixpkgs`. 157 "tensorflow_datasets/audio/nsynth.py" 158 "tensorflow_datasets/datasets/nsynth/nsynth_dataset_builder_test.py" 159 160 # Requires `conllu` which is not packaged in `nixpkgs`. 161 "tensorflow_datasets/core/dataset_builders/conll/conllu_dataset_builder_test.py" 162 "tensorflow_datasets/datasets/universal_dependencies/universal_dependencies_dataset_builder_test.py" 163 "tensorflow_datasets/datasets/xtreme_pos/xtreme_pos_dataset_builder_test.py" 164 165 # Requires `gcld3` and `pretty_midi` which are not packaged in `nixpkgs`. 166 "tensorflow_datasets/core/lazy_imports_lib_test.py" 167 168 # AttributeError: 'NoneType' object has no attribute 'Table' 169 "tensorflow_datasets/core/dataset_builder_beam_test.py" 170 "tensorflow_datasets/core/dataset_builders/adhoc_builder_test.py" 171 "tensorflow_datasets/core/split_builder_test.py" 172 "tensorflow_datasets/core/writer_test.py" 173 174 # Requires `tensorflow_io` which is not packaged in `nixpkgs`. 175 "tensorflow_datasets/core/features/audio_feature_test.py" 176 "tensorflow_datasets/image/lsun_test.py" 177 178 # Fails with `TypeError: Constant constructor takes either 0 or 2 positional arguments` 179 # deep in TF AutoGraph. Doesn't reproduce in Docker with Ubuntu 22.04 => might be related 180 # to the differences in some of the dependencies? 181 "tensorflow_datasets/rl_unplugged/rlu_atari/rlu_atari_test.py" 182 183 # Fails with `ValueError: setting an array element with a sequence` 184 "tensorflow_datasets/core/dataset_utils_test.py" 185 "tensorflow_datasets/core/features/sequence_feature_test.py" 186 187 # Requires `tensorflow_docs` which is not packaged in `nixpkgs` and the test is for documentation anyway. 188 "tensorflow_datasets/scripts/documentation/build_api_docs_test.py" 189 190 # Not a test, should not be executed. 191 "tensorflow_datasets/testing/test_utils.py" 192 193 # Require `gcld3` and `nltk.punkt` which are not packaged in `nixpkgs`. 194 "tensorflow_datasets/text/c4_test.py" 195 "tensorflow_datasets/text/c4_utils_test.py" 196 ]; 197 198 meta = { 199 description = "Library of datasets ready to use with TensorFlow"; 200 homepage = "https://www.tensorflow.org/datasets/overview"; 201 changelog = "https://github.com/tensorflow/datasets/releases/tag/v${version}"; 202 license = lib.licenses.asl20; 203 maintainers = with lib.maintainers; [ ndl ]; 204 }; 205}