Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
at flake-libs 331 lines 5.8 kB view raw
1{ 2 lib, 3 buildPythonPackage, 4 fetchFromGitHub, 5 6 # build-system 7 setuptools, 8 9 # dependencies 10 faiss, 11 torch, 12 transformers, 13 huggingface-hub, 14 numpy, 15 pyyaml, 16 regex, 17 18 # optional-dependencies 19 # agent 20 mcpadapt, 21 smolagents, 22 # ann 23 annoy, 24 hnswlib, 25 pgvector, 26 sqlalchemy, 27 sqlite-vec, 28 # api 29 aiohttp, 30 fastapi, 31 fastapi-mcp, 32 httpx, 33 pillow, 34 python-multipart, 35 uvicorn, 36 # cloud 37 # apache-libcloud, (unpackaged) 38 fasteners, 39 # console 40 rich, 41 # database 42 duckdb, 43 # graph 44 # grand-cypher (unpackaged) 45 # grand-graph (unpackaged) 46 networkx, 47 # model 48 onnx, 49 onnxruntime, 50 # pipeline-audio 51 # model2vec, 52 sounddevice, 53 soundfile, 54 scipy, 55 ttstokenizer, 56 webrtcvad, 57 # pipeline-data 58 beautifulsoup4, 59 nltk, 60 pandas, 61 tika, 62 # pipeline-image 63 imagehash, 64 timm, 65 # pipeline-llm 66 litellm, 67 # llama-cpp-python, (unpackaged) 68 # pipeline-text 69 gliner, 70 sentencepiece, 71 staticvectors, 72 # pipeline-train 73 accelerate, 74 bitsandbytes, 75 onnxmltools, 76 peft, 77 skl2onnx, 78 # vectors 79 fasttext, 80 # pymagnitude-lite, (unpackaged) 81 scikit-learn, 82 sentence-transformers, 83 skops, 84 # workflow 85 # apache-libcloud (unpackaged) 86 croniter, 87 openpyxl, 88 requests, 89 xmltodict, 90 91 # tests 92 msgpack, 93 pytestCheckHook, 94}: 95let 96 version = "8.5.0"; 97 agent = [ 98 mcpadapt 99 smolagents 100 ]; 101 ann = [ 102 annoy 103 hnswlib 104 pgvector 105 sqlalchemy 106 sqlite-vec 107 ]; 108 api = [ 109 aiohttp 110 fastapi 111 fastapi-mcp 112 httpx 113 pillow 114 python-multipart 115 uvicorn 116 ]; 117 cloud = [ 118 # apache-libcloud 119 fasteners 120 ]; 121 console = [ rich ]; 122 database = [ 123 duckdb 124 pillow 125 sqlalchemy 126 ]; 127 graph = [ 128 # grand-cypher 129 # grand-graph 130 networkx 131 sqlalchemy 132 ]; 133 model = [ 134 onnx 135 onnxruntime 136 ]; 137 pipeline-audio = [ 138 onnx 139 onnxruntime 140 scipy 141 sounddevice 142 soundfile 143 ttstokenizer 144 webrtcvad 145 ]; 146 pipeline-data = [ 147 beautifulsoup4 148 nltk 149 pandas 150 tika 151 ]; 152 pipeline-image = [ 153 imagehash 154 pillow 155 timm 156 ]; 157 pipeline-llm = [ 158 litellm 159 # llama-cpp-python 160 ]; 161 pipeline-text = [ 162 gliner 163 sentencepiece 164 staticvectors 165 ]; 166 pipeline-train = [ 167 accelerate 168 bitsandbytes 169 onnx 170 onnxmltools 171 onnxruntime 172 peft 173 skl2onnx 174 ]; 175 pipeline = 176 pipeline-audio 177 ++ pipeline-data 178 ++ pipeline-image 179 ++ pipeline-llm 180 ++ pipeline-text 181 ++ pipeline-train; 182 scoring = [ sqlalchemy ]; 183 vectors = [ 184 fasttext 185 litellm 186 # llama-cpp-python 187 # model2vec 188 # pymagnitude-lite 189 scikit-learn 190 sentence-transformers 191 skops 192 ]; 193 workflow = [ 194 # apache-libcloud 195 croniter 196 openpyxl 197 pandas 198 pillow 199 requests 200 xmltodict 201 ]; 202 similarity = ann ++ vectors; 203 all = 204 agent 205 ++ api 206 ++ ann 207 ++ console 208 ++ database 209 ++ graph 210 ++ model 211 ++ pipeline 212 ++ scoring 213 ++ similarity 214 ++ workflow; 215 216 optional-dependencies = { 217 inherit 218 agent 219 ann 220 api 221 cloud 222 console 223 database 224 graph 225 model 226 pipeline-audio 227 pipeline-image 228 pipeline-llm 229 pipeline-text 230 pipeline-train 231 pipeline 232 scoring 233 similarity 234 workflow 235 all 236 ; 237 }; 238 239 src = fetchFromGitHub { 240 owner = "neuml"; 241 repo = "txtai"; 242 tag = "v${version}"; 243 hash = "sha256-kYjlA7pJ+xCC+tu0aaxziKaPo3hph5Ld8P/lVrip/eM="; 244 }; 245in 246buildPythonPackage { 247 pname = "txtai"; 248 inherit version src; 249 pyproject = true; 250 251 build-system = [ setuptools ]; 252 253 pythonRemoveDeps = [ 254 # We call it faiss, not faiss-cpu. 255 "faiss-cpu" 256 ]; 257 258 dependencies = [ 259 faiss 260 huggingface-hub 261 msgpack 262 numpy 263 pyyaml 264 regex 265 torch 266 transformers 267 ]; 268 269 optional-dependencies = optional-dependencies; 270 271 # The Python imports check runs huggingface-hub which needs a writable directory. 272 # `pythonImportsCheck` runs in the installPhase (before checkPhase). 273 preInstall = '' 274 export HF_HOME=$(mktemp -d) 275 ''; 276 277 pythonImportsCheck = [ "txtai" ]; 278 279 nativeCheckInputs = 280 [ 281 httpx 282 msgpack 283 pytestCheckHook 284 python-multipart 285 timm 286 sqlalchemy 287 ] 288 ++ optional-dependencies.agent 289 ++ optional-dependencies.ann 290 ++ optional-dependencies.api 291 ++ optional-dependencies.similarity; 292 293 # The deselected paths depend on the huggingface hub and should be run as a passthru test 294 # disabledTestPaths won't work as the problem is with the classes containing the tests 295 # (in other words, it fails on __init__) 296 pytestFlagsArray = [ 297 "test/python/test*.py" 298 "--deselect=test/python/testagent.py" 299 "--deselect=test/python/testcloud.py" 300 "--deselect=test/python/testconsole.py" 301 "--deselect=test/python/testembeddings.py" 302 "--deselect=test/python/testgraph.py" 303 "--deselect=test/python/testapi/testapiembeddings.py" 304 "--deselect=test/python/testapi/testapipipelines.py" 305 "--deselect=test/python/testapi/testapiworkflow.py" 306 "--deselect=test/python/testdatabase/testclient.py" 307 "--deselect=test/python/testdatabase/testduckdb.py" 308 "--deselect=test/python/testdatabase/testencoder.py" 309 "--deselect=test/python/testworkflow.py" 310 ]; 311 312 disabledTests = [ 313 # Hardcoded paths 314 "testInvalidTar" 315 "testInvalidZip" 316 # Downloads from Huggingface 317 "testPipeline" 318 "testVectors" 319 # Not finding sqlite-vec despite being supplied 320 "testSQLite" 321 "testSQLiteCustom" 322 ]; 323 324 meta = { 325 description = "Semantic search and workflows powered by language models"; 326 changelog = "https://github.com/neuml/txtai/releases/tag/${src.tag}"; 327 homepage = "https://github.com/neuml/txtai"; 328 license = lib.licenses.asl20; 329 maintainers = with lib.maintainers; [ happysalada ]; 330 }; 331}