1{ lib
2, aiohttp
3, buildPythonPackage
4, dill
5, fetchFromGitHub
6, fetchpatch
7, fsspec
8, huggingface-hub
9, importlib-metadata
10, multiprocess
11, numpy
12, packaging
13, pandas
14, pyarrow
15, pythonOlder
16, requests
17, responses
18, tqdm
19, xxhash
20}:
21
22buildPythonPackage rec {
23 pname = "datasets";
24 version = "2.12.0";
25 format = "setuptools";
26
27 disabled = pythonOlder "3.7";
28
29 src = fetchFromGitHub {
30 owner = "huggingface";
31 repo = pname;
32 rev = "refs/tags/${version}";
33 hash = "sha256-o/LUzRmpM4tjiCh31KoQXzU1Z/p/91uamh7G4SGnxQM=";
34 };
35
36 postPatch = ''
37 substituteInPlace setup.py \
38 --replace "responses<0.19" "responses"
39 '';
40
41 propagatedBuildInputs = [
42 aiohttp
43 dill
44 fsspec
45 huggingface-hub
46 multiprocess
47 numpy
48 packaging
49 pandas
50 pyarrow
51 requests
52 responses
53 tqdm
54 xxhash
55 ] ++ lib.optionals (pythonOlder "3.8") [
56 importlib-metadata
57 ];
58
59 # Tests require pervasive internet access.
60 doCheck = false;
61
62 # Module import will attempt to create a cache directory.
63 postFixup = "export HF_MODULES_CACHE=$TMPDIR";
64
65 pythonImportsCheck = [
66 "datasets"
67 ];
68
69 meta = with lib; {
70 description = "Open-access datasets and evaluation metrics for natural language processing";
71 homepage = "https://github.com/huggingface/datasets";
72 changelog = "https://github.com/huggingface/datasets/releases/tag/${version}";
73 license = licenses.asl20;
74 platforms = platforms.unix;
75 maintainers = with maintainers; [ ];
76 };
77}