1{ lib
2, buildPythonPackage
3, fetchFromGitHub
4, dill
5, filelock
6, fsspec
7, huggingface-hub
8, multiprocess
9, numpy
10, pandas
11, pyarrow
12, requests
13, tqdm
14, xxhash
15}:
16
17buildPythonPackage rec {
18 pname = "datasets";
19 version = "1.4.1";
20
21 src = fetchFromGitHub {
22 owner = "huggingface";
23 repo = pname;
24 rev = version;
25 hash = "sha256-is8TS84varARWyfeDTbQH0pcYFTk0PcEyK183emB4GE=";
26 };
27
28 propagatedBuildInputs = [
29 dill
30 filelock
31 fsspec
32 huggingface-hub
33 multiprocess
34 numpy
35 pandas
36 pyarrow
37 requests
38 tqdm
39 xxhash
40 ];
41
42 postPatch = ''
43 substituteInPlace setup.py \
44 --replace '"tqdm>=4.27,<4.50.0"' '"tqdm>=4.27"' \
45 --replace "huggingface_hub==0.0.2" "huggingface_hub>=0.0.2"
46 '';
47
48 # Tests require pervasive internet access.
49 doCheck = false;
50
51 # Module import will attempt to create a cache directory.
52 postFixup = "export HF_MODULES_CACHE=$TMPDIR";
53
54 pythonImportsCheck = [ "datasets" ];
55
56 meta = with lib; {
57 homepage = "https://github.com/huggingface/datasets";
58 description = "Fast, efficient, open-access datasets and evaluation metrics for natural language processing";
59 changelog = "https://github.com/huggingface/datasets/releases/tag/${version}";
60 license = licenses.asl20;
61 platforms = platforms.unix;
62 maintainers = with maintainers; [ danieldk ];
63 };
64}