1{ lib
2, aiohttp
3, buildPythonPackage
4, dill
5, fetchFromGitHub
6, fetchpatch
7, fsspec
8, huggingface-hub
9, importlib-metadata
10, multiprocess
11, numpy
12, packaging
13, pandas
14, pyarrow
15, pythonOlder
16, requests
17, responses
18, tqdm
19, xxhash
20}:
21
22buildPythonPackage rec {
23 pname = "datasets";
24 version = "2.6.1";
25 format = "setuptools";
26
27 disabled = pythonOlder "3.7";
28
29 src = fetchFromGitHub {
30 owner = "huggingface";
31 repo = pname;
32 rev = "refs/tags/${version}";
33 hash = "sha256-5j8HT/DzHH8xssv97g/9kpSgtpaY6daWOGwjasD1psg=";
34 };
35
36 patches = [
37 (fetchpatch {
38 # Backport support for dill<3.7
39 url = "https://github.com/huggingface/datasets/pull/5166.patch";
40 hash = "sha256-QigpXKHi2B60M/iIWSqvBU9hW5vBu6IHGML22aCMevo=";
41 })
42 ];
43
44 postPatch = ''
45 substituteInPlace setup.py \
46 --replace "responses<0.19" "responses"
47 '';
48
49 propagatedBuildInputs = [
50 aiohttp
51 dill
52 fsspec
53 huggingface-hub
54 multiprocess
55 numpy
56 packaging
57 pandas
58 pyarrow
59 requests
60 responses
61 tqdm
62 xxhash
63 ] ++ lib.optionals (pythonOlder "3.8") [
64 importlib-metadata
65 ];
66
67 # Tests require pervasive internet access.
68 doCheck = false;
69
70 # Module import will attempt to create a cache directory.
71 postFixup = "export HF_MODULES_CACHE=$TMPDIR";
72
73 pythonImportsCheck = [
74 "datasets"
75 ];
76
77 meta = with lib; {
78 description = "Open-access datasets and evaluation metrics for natural language processing";
79 homepage = "https://github.com/huggingface/datasets";
80 changelog = "https://github.com/huggingface/datasets/releases/tag/${version}";
81 license = licenses.asl20;
82 platforms = platforms.unix;
83 maintainers = with maintainers; [ ];
84 };
85}