1{ 2 lib, 3 aiohttp, 4 buildPythonPackage, 5 dill, 6 fetchFromGitHub, 7 fetchpatch, 8 fsspec, 9 huggingface-hub, 10 importlib-metadata, 11 multiprocess, 12 numpy, 13 packaging, 14 pandas, 15 pyarrow, 16 pythonOlder, 17 requests, 18 responses, 19 tqdm, 20 xxhash, 21}: 22 23buildPythonPackage rec { 24 pname = "datasets"; 25 version = "2.19.0"; 26 format = "setuptools"; 27 28 disabled = pythonOlder "3.8"; 29 30 src = fetchFromGitHub { 31 owner = "huggingface"; 32 repo = pname; 33 rev = "refs/tags/${version}"; 34 hash = "sha256-m3x3/MCezA0WjYKBa2F12emMZdwLKi/9bFBf59A4qs8="; 35 }; 36 37 # remove pyarrow<14.0.1 vulnerability fix 38 postPatch = '' 39 substituteInPlace src/datasets/features/features.py \ 40 --replace "import pyarrow_hotfix" "#import pyarrow_hotfix" 41 ''; 42 43 propagatedBuildInputs = [ 44 aiohttp 45 dill 46 fsspec 47 huggingface-hub 48 multiprocess 49 numpy 50 packaging 51 pandas 52 pyarrow 53 requests 54 responses 55 tqdm 56 xxhash 57 ] ++ lib.optionals (pythonOlder "3.8") [ importlib-metadata ]; 58 59 # Tests require pervasive internet access 60 doCheck = false; 61 62 # Module import will attempt to create a cache directory 63 postFixup = "export HF_MODULES_CACHE=$TMPDIR"; 64 65 pythonImportsCheck = [ "datasets" ]; 66 67 meta = with lib; { 68 description = "Open-access datasets and evaluation metrics for natural language processing"; 69 mainProgram = "datasets-cli"; 70 homepage = "https://github.com/huggingface/datasets"; 71 changelog = "https://github.com/huggingface/datasets/releases/tag/${version}"; 72 license = licenses.asl20; 73 platforms = platforms.unix; 74 maintainers = with maintainers; [ ]; 75 }; 76}