at 25.11-pre 3.0 kB view raw
1{ 2 lib, 3 buildPythonPackage, 4 fetchFromGitHub, 5 setuptools-scm, 6 accelerate, 7 aiohttp, 8 antlr4-python3-runtime, 9 causal-conv1d, 10 datasets, 11 dill, 12 evaluate, 13 hf-transfer, 14 immutabledict, 15 jsonlines, 16 langdetect, 17 mamba-ssm, 18 more-itertools, 19 nltk, 20 numexpr, 21 numpy, 22 optimum, 23 pandas, 24 peft, 25 pybind11, 26 pytablewriter, 27 pytestCheckHook, 28 requests, 29 rouge-score, 30 sacrebleu, 31 scikit-learn, 32 sentencepiece, 33 sqlitedict, 34 sympy, 35 tenacity, 36 tiktoken, 37 torch, 38 tqdm, 39 tqdm-multiprocess, 40 transformers, 41 vllm, 42 wandb, 43 word2number, 44 zstandard, 45}: 46 47buildPythonPackage rec { 48 pname = "lm-eval"; 49 version = "0.4.8"; 50 pyproject = true; 51 52 src = fetchFromGitHub { 53 owner = "EleutherAI"; 54 repo = "lm-evaluation-harness"; 55 tag = "v${version}"; 56 hash = "sha256-F8oy6XTovqiU7FQyuubRsiblSdvfZg9RPIyzRw2GH18="; 57 }; 58 59 build-system = [ 60 setuptools-scm 61 ]; 62 63 dependencies = [ 64 accelerate 65 datasets 66 dill 67 evaluate 68 jsonlines 69 more-itertools 70 numexpr 71 peft 72 pybind11 73 pytablewriter 74 rouge-score 75 sacrebleu 76 scikit-learn 77 sqlitedict 78 torch 79 tqdm-multiprocess 80 transformers 81 word2number 82 zstandard 83 ]; 84 85 optional-dependencies = { 86 api = [ 87 requests 88 aiohttp 89 tenacity 90 tqdm 91 tiktoken 92 ]; 93 hf_transfer = [ hf-transfer ]; 94 ifeval = [ 95 langdetect 96 immutabledict 97 nltk 98 ]; 99 neuronx = [ optimum ] ++ optimum.optional-dependencies.neuronx; 100 mamba = [ 101 mamba-ssm 102 causal-conv1d 103 ]; 104 math = [ 105 sympy 106 antlr4-python3-runtime 107 ]; 108 optimum = [ optimum ] ++ optimum.optional-dependencies.openvino; 109 sentencepiece = [ sentencepiece ]; 110 vllm = [ vllm ]; 111 wandb = [ 112 wandb 113 pandas 114 numpy 115 ]; 116 # Still missing dependencies for the following: 117 # deepsparse, gptq, ibm_watsonx_ai, multilingual, promptsource, sparseml, 118 # zeno, gptqmodel, japanese_leaderboard; all = [...]; 119 }; 120 121 pythonImportsCheck = [ "lm_eval" ]; 122 123 nativeCheckInputs = [ 124 pytestCheckHook 125 ] ++ optional-dependencies.api; 126 127 preCheck = '' 128 export HOME=$TMP 129 ''; 130 131 disabledTests = [ 132 "test_deepsparse" # deepsparse is not available 133 "test_model_tokenized_call_usage" # downloads a model 134 ]; 135 136 disabledTestPaths = [ 137 # attempts to download models 138 "tests/models/test_huggingface.py" 139 "tests/test_evaluator.py" 140 "tests/test_include_path.py" 141 "tests/test_prompt.py" 142 "tests/test_task_manager.py" 143 "tests/test_tasks.py" 144 145 # optimum-intel is not available 146 "tests/models/test_openvino.py" 147 ]; 148 149 meta = { 150 changelog = "https://github.com/EleutherAI/lm-evaluation-harness/releases/tag/${src.tag}"; 151 description = "A framework for few-shot evaluation of language models"; 152 homepage = "https://github.com/EleutherAI/lm-evaluation-harness"; 153 license = [ lib.licenses.mit ]; 154 maintainers = [ lib.maintainers.booxter ]; 155 }; 156}