1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5 setuptools-scm,
6 accelerate,
7 aiohttp,
8 antlr4-python3-runtime,
9 causal-conv1d,
10 datasets,
11 dill,
12 evaluate,
13 hf-transfer,
14 immutabledict,
15 jsonlines,
16 langdetect,
17 mamba-ssm,
18 more-itertools,
19 nltk,
20 numexpr,
21 numpy,
22 optimum,
23 pandas,
24 peft,
25 pybind11,
26 pytablewriter,
27 pytestCheckHook,
28 requests,
29 rouge-score,
30 sacrebleu,
31 scikit-learn,
32 sentencepiece,
33 sqlitedict,
34 sympy,
35 tenacity,
36 tiktoken,
37 torch,
38 tqdm,
39 tqdm-multiprocess,
40 transformers,
41 vllm,
42 wandb,
43 word2number,
44 zstandard,
45}:
46
47buildPythonPackage rec {
48 pname = "lm-eval";
49 version = "0.4.8";
50 pyproject = true;
51
52 src = fetchFromGitHub {
53 owner = "EleutherAI";
54 repo = "lm-evaluation-harness";
55 tag = "v${version}";
56 hash = "sha256-F8oy6XTovqiU7FQyuubRsiblSdvfZg9RPIyzRw2GH18=";
57 };
58
59 build-system = [
60 setuptools-scm
61 ];
62
63 dependencies = [
64 accelerate
65 datasets
66 dill
67 evaluate
68 jsonlines
69 more-itertools
70 numexpr
71 peft
72 pybind11
73 pytablewriter
74 rouge-score
75 sacrebleu
76 scikit-learn
77 sqlitedict
78 torch
79 tqdm-multiprocess
80 transformers
81 word2number
82 zstandard
83 ];
84
85 optional-dependencies = {
86 api = [
87 requests
88 aiohttp
89 tenacity
90 tqdm
91 tiktoken
92 ];
93 hf_transfer = [ hf-transfer ];
94 ifeval = [
95 langdetect
96 immutabledict
97 nltk
98 ];
99 neuronx = [ optimum ] ++ optimum.optional-dependencies.neuronx;
100 mamba = [
101 mamba-ssm
102 causal-conv1d
103 ];
104 math = [
105 sympy
106 antlr4-python3-runtime
107 ];
108 optimum = [ optimum ] ++ optimum.optional-dependencies.openvino;
109 sentencepiece = [ sentencepiece ];
110 vllm = [ vllm ];
111 wandb = [
112 wandb
113 pandas
114 numpy
115 ];
116 # Still missing dependencies for the following:
117 # deepsparse, gptq, ibm_watsonx_ai, multilingual, promptsource, sparseml,
118 # zeno, gptqmodel, japanese_leaderboard; all = [...];
119 };
120
121 pythonImportsCheck = [ "lm_eval" ];
122
123 nativeCheckInputs = [
124 pytestCheckHook
125 ] ++ optional-dependencies.api;
126
127 preCheck = ''
128 export HOME=$TMP
129 '';
130
131 disabledTests = [
132 "test_deepsparse" # deepsparse is not available
133 "test_model_tokenized_call_usage" # downloads a model
134 ];
135
136 disabledTestPaths = [
137 # attempts to download models
138 "tests/models/test_huggingface.py"
139 "tests/test_evaluator.py"
140 "tests/test_include_path.py"
141 "tests/test_prompt.py"
142 "tests/test_task_manager.py"
143 "tests/test_tasks.py"
144
145 # optimum-intel is not available
146 "tests/models/test_openvino.py"
147 ];
148
149 meta = {
150 changelog = "https://github.com/EleutherAI/lm-evaluation-harness/releases/tag/${src.tag}";
151 description = "A framework for few-shot evaluation of language models";
152 homepage = "https://github.com/EleutherAI/lm-evaluation-harness";
153 license = [ lib.licenses.mit ];
154 maintainers = [ lib.maintainers.booxter ];
155 };
156}