1{ lib
2, buildPythonPackage
3, fetchFromGitHub
4, dill
5, filelock
6, fsspec
7, huggingface-hub
8, multiprocess
9, numpy
10, pandas
11, pyarrow
12, requests
13, tqdm
14, xxhash
15}:
16
17buildPythonPackage rec {
18 pname = "datasets";
19 version = "1.11.0";
20
21 src = fetchFromGitHub {
22 owner = "huggingface";
23 repo = pname;
24 rev = version;
25 sha256 = "0pm14cp7xaagpf4j96v0ybi5gn3r9p0if3pc197ckwx6sw3lx29p";
26 };
27
28 propagatedBuildInputs = [
29 dill
30 filelock
31 fsspec
32 huggingface-hub
33 multiprocess
34 numpy
35 pandas
36 pyarrow
37 requests
38 tqdm
39 xxhash
40 ];
41
42 # Tests require pervasive internet access.
43 doCheck = false;
44
45 # Module import will attempt to create a cache directory.
46 postFixup = "export HF_MODULES_CACHE=$TMPDIR";
47
48 pythonImportsCheck = [ "datasets" ];
49
50 meta = with lib; {
51 homepage = "https://github.com/huggingface/datasets";
52 description = "Fast, efficient, open-access datasets and evaluation metrics for natural language processing";
53 changelog = "https://github.com/huggingface/datasets/releases/tag/${version}";
54 license = licenses.asl20;
55 platforms = platforms.unix;
56 maintainers = with maintainers; [ ];
57 };
58}