{ lib, buildPythonPackage, fetchFromGitHub, # build-system setuptools, # dependencies dill, filelock, fsspec, httpx, huggingface-hub, multiprocess, numpy, pandas, pyarrow, pyyaml, requests, tqdm, xxhash, }: buildPythonPackage rec { pname = "datasets"; version = "4.5.0"; pyproject = true; src = fetchFromGitHub { owner = "huggingface"; repo = "datasets"; tag = version; hash = "sha256-K8JqIbYz3ZfT1t1h5dRGCo9kBQp0E+kElqzaw2InaOI="; }; build-system = [ setuptools ]; dependencies = [ dill filelock fsspec httpx huggingface-hub multiprocess numpy pandas pyarrow pyyaml requests tqdm xxhash ] ++ fsspec.optional-dependencies.http; pythonRelaxDeps = [ # https://github.com/huggingface/datasets/blob/a256b85cbc67aa3f0e75d32d6586afc507cf535b/setup.py#L117 # "pin until dill has official support for determinism" "dill" # https://github.com/huggingface/datasets/blob/4.5.0/setup.py#L127 "multiprocess" # https://github.com/huggingface/datasets/blob/4.5.0/setup.py#L130 "fsspec" ]; # Tests require pervasive internet access doCheck = false; # Module import will attempt to create a cache directory postFixup = "export HF_MODULES_CACHE=$TMPDIR"; pythonImportsCheck = [ "datasets" ]; meta = { description = "Open-access datasets and evaluation metrics for natural language processing"; mainProgram = "datasets-cli"; homepage = "https://github.com/huggingface/datasets"; changelog = "https://github.com/huggingface/datasets/releases/tag/${src.tag}"; license = lib.licenses.asl20; maintainers = with lib.maintainers; [ osbm ]; }; }