1{ lib
2, aiohttp
3, buildPythonPackage
4, dill
5, fetchFromGitHub
6, fetchpatch
7, fsspec
8, huggingface-hub
9, importlib-metadata
10, multiprocess
11, numpy
12, packaging
13, pandas
14, pyarrow
15, pythonOlder
16, requests
17, responses
18, tqdm
19, xxhash
20}:
21
22buildPythonPackage rec {
23 pname = "datasets";
24 version = "2.14.5";
25 format = "setuptools";
26
27 disabled = pythonOlder "3.8";
28
29 src = fetchFromGitHub {
30 owner = "huggingface";
31 repo = pname;
32 rev = "refs/tags/${version}";
33 hash = "sha256-oLB6laY/Si071mBKoWlZpd1fqr/wNtAnhRvBKLjeEuE=";
34 };
35
36 propagatedBuildInputs = [
37 aiohttp
38 dill
39 fsspec
40 huggingface-hub
41 multiprocess
42 numpy
43 packaging
44 pandas
45 pyarrow
46 requests
47 responses
48 tqdm
49 xxhash
50 ] ++ lib.optionals (pythonOlder "3.8") [
51 importlib-metadata
52 ];
53
54 # Tests require pervasive internet access
55 doCheck = false;
56
57 # Module import will attempt to create a cache directory
58 postFixup = "export HF_MODULES_CACHE=$TMPDIR";
59
60 pythonImportsCheck = [
61 "datasets"
62 ];
63
64 meta = with lib; {
65 description = "Open-access datasets and evaluation metrics for natural language processing";
66 homepage = "https://github.com/huggingface/datasets";
67 changelog = "https://github.com/huggingface/datasets/releases/tag/${version}";
68 license = licenses.asl20;
69 platforms = platforms.unix;
70 maintainers = with maintainers; [ ];
71 };
72}