Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
1{ 2 lib, 3 aiohttp, 4 buildPythonPackage, 5 dill, 6 fetchFromGitHub, 7 fsspec, 8 huggingface-hub, 9 importlib-metadata, 10 multiprocess, 11 numpy, 12 packaging, 13 pandas, 14 pyarrow, 15 pythonOlder, 16 requests, 17 responses, 18 tqdm, 19 xxhash, 20}: 21 22buildPythonPackage rec { 23 pname = "datasets"; 24 version = "2.20.0"; 25 format = "setuptools"; 26 27 disabled = pythonOlder "3.8"; 28 29 src = fetchFromGitHub { 30 owner = "huggingface"; 31 repo = pname; 32 rev = "refs/tags/${version}"; 33 hash = "sha256-9mB4RXJVkmaK+fLEmyZAdf64YKGoAhE3RzMoj4/8K98="; 34 }; 35 36 # remove pyarrow<14.0.1 vulnerability fix 37 postPatch = '' 38 substituteInPlace src/datasets/features/features.py \ 39 --replace "import pyarrow_hotfix" "#import pyarrow_hotfix" 40 ''; 41 42 propagatedBuildInputs = [ 43 aiohttp 44 dill 45 fsspec 46 huggingface-hub 47 multiprocess 48 numpy 49 packaging 50 pandas 51 pyarrow 52 requests 53 responses 54 tqdm 55 xxhash 56 ] ++ lib.optionals (pythonOlder "3.8") [ importlib-metadata ]; 57 58 # Tests require pervasive internet access 59 doCheck = false; 60 61 # Module import will attempt to create a cache directory 62 postFixup = "export HF_MODULES_CACHE=$TMPDIR"; 63 64 pythonImportsCheck = [ "datasets" ]; 65 66 meta = with lib; { 67 description = "Open-access datasets and evaluation metrics for natural language processing"; 68 mainProgram = "datasets-cli"; 69 homepage = "https://github.com/huggingface/datasets"; 70 changelog = "https://github.com/huggingface/datasets/releases/tag/${version}"; 71 license = licenses.asl20; 72 platforms = platforms.unix; 73 maintainers = [ ]; 74 }; 75}