at master 5.0 kB view raw
1{ 2 lib, 3 buildPythonPackage, 4 fetchFromGitHub, 5 6 # build-system 7 setuptools, 8 9 # dependencies 10 faiss, 11 torch, 12 transformers, 13 huggingface-hub, 14 numpy, 15 pyyaml, 16 regex, 17 18 # optional-dependencies 19 # agent 20 mcpadapt, 21 smolagents, 22 # ann 23 annoy, 24 hnswlib, 25 pgvector, 26 sqlalchemy, 27 sqlite-vec-c, 28 # api 29 aiohttp, 30 fastapi, 31 fastapi-mcp, 32 httpx, 33 pillow, 34 python-multipart, 35 uvicorn, 36 # cloud 37 # apache-libcloud, (unpackaged) 38 fasteners, 39 # console 40 rich, 41 # database 42 duckdb, 43 # graph 44 # grand-cypher (unpackaged) 45 # grand-graph (unpackaged) 46 networkx, 47 # model 48 onnx, 49 onnxruntime, 50 # pipeline-audio 51 # model2vec, 52 sounddevice, 53 soundfile, 54 scipy, 55 ttstokenizer, 56 webrtcvad, 57 # pipeline-data 58 beautifulsoup4, 59 nltk, 60 pandas, 61 tika, 62 # pipeline-image 63 imagehash, 64 timm, 65 # pipeline-llm 66 litellm, 67 # llama-cpp-python, (unpackaged) 68 # pipeline-text 69 gliner, 70 sentencepiece, 71 staticvectors, 72 # pipeline-train 73 accelerate, 74 bitsandbytes, 75 onnxmltools, 76 peft, 77 skl2onnx, 78 # vectors 79 fasttext, 80 # pymagnitude-lite, (unpackaged) 81 scikit-learn, 82 sentence-transformers, 83 skops, 84 # workflow 85 # apache-libcloud (unpackaged) 86 croniter, 87 openpyxl, 88 requests, 89 xmltodict, 90 91 # tests 92 msgpack, 93 pytestCheckHook, 94}: 95let 96 version = "9.0.1"; 97 agent = [ 98 mcpadapt 99 smolagents 100 ]; 101 ann = [ 102 annoy 103 hnswlib 104 pgvector 105 sqlalchemy 106 sqlite-vec-c 107 ]; 108 api = [ 109 aiohttp 110 fastapi 111 fastapi-mcp 112 httpx 113 pillow 114 python-multipart 115 uvicorn 116 ]; 117 cloud = [ 118 # apache-libcloud 119 fasteners 120 ]; 121 console = [ rich ]; 122 database = [ 123 duckdb 124 pillow 125 sqlalchemy 126 ]; 127 graph = [ 128 # grand-cypher 129 # grand-graph 130 networkx 131 sqlalchemy 132 ]; 133 model = [ 134 onnx 135 onnxruntime 136 ]; 137 pipeline-audio = [ 138 onnx 139 onnxruntime 140 scipy 141 sounddevice 142 soundfile 143 ttstokenizer 144 webrtcvad 145 ]; 146 pipeline-data = [ 147 beautifulsoup4 148 nltk 149 pandas 150 tika 151 ]; 152 pipeline-image = [ 153 imagehash 154 pillow 155 timm 156 ]; 157 pipeline-llm = [ 158 litellm 159 # llama-cpp-python 160 ]; 161 pipeline-text = [ 162 gliner 163 sentencepiece 164 staticvectors 165 ]; 166 pipeline-train = [ 167 accelerate 168 bitsandbytes 169 onnx 170 onnxmltools 171 onnxruntime 172 peft 173 skl2onnx 174 ]; 175 pipeline = 176 pipeline-audio 177 ++ pipeline-data 178 ++ pipeline-image 179 ++ pipeline-llm 180 ++ pipeline-text 181 ++ pipeline-train; 182 scoring = [ sqlalchemy ]; 183 vectors = [ 184 fasttext 185 litellm 186 # llama-cpp-python 187 # model2vec 188 # pymagnitude-lite 189 scikit-learn 190 sentence-transformers 191 skops 192 ]; 193 workflow = [ 194 # apache-libcloud 195 croniter 196 openpyxl 197 pandas 198 pillow 199 requests 200 xmltodict 201 ]; 202 similarity = ann ++ vectors; 203 all = 204 agent 205 ++ api 206 ++ ann 207 ++ console 208 ++ database 209 ++ graph 210 ++ model 211 ++ pipeline 212 ++ scoring 213 ++ similarity 214 ++ workflow; 215 216 optional-dependencies = { 217 inherit 218 agent 219 ann 220 api 221 cloud 222 console 223 database 224 graph 225 model 226 pipeline-audio 227 pipeline-data 228 pipeline-image 229 pipeline-llm 230 pipeline-text 231 pipeline-train 232 pipeline 233 scoring 234 similarity 235 workflow 236 all 237 ; 238 }; 239 240 src = fetchFromGitHub { 241 owner = "neuml"; 242 repo = "txtai"; 243 tag = "v${version}"; 244 hash = "sha256-ciQDKpqTdgYe4oIgd2uxY7491SMr9Snha9XyTpxgXyY="; 245 }; 246in 247buildPythonPackage { 248 pname = "txtai"; 249 inherit version src; 250 pyproject = true; 251 252 build-system = [ setuptools ]; 253 254 pythonRemoveDeps = [ 255 # We call it faiss, not faiss-cpu. 256 "faiss-cpu" 257 ]; 258 259 dependencies = [ 260 faiss 261 huggingface-hub 262 msgpack 263 numpy 264 pyyaml 265 regex 266 torch 267 transformers 268 ]; 269 270 optional-dependencies = optional-dependencies; 271 272 # The Python imports check runs huggingface-hub which needs a writable directory. 273 # `pythonImportsCheck` runs in the installPhase (before checkPhase). 274 preInstall = '' 275 export HF_HOME=$(mktemp -d) 276 ''; 277 278 pythonImportsCheck = [ "txtai" ]; 279 280 nativeCheckInputs = [ 281 httpx 282 msgpack 283 pytestCheckHook 284 python-multipart 285 timm 286 sqlalchemy 287 ] 288 ++ optional-dependencies.agent 289 ++ optional-dependencies.ann 290 ++ optional-dependencies.api 291 ++ optional-dependencies.similarity; 292 293 pytestFlagsArray = [ 294 "test/python/*" 295 ]; 296 297 disabledTests = [ 298 # Hardcoded paths 299 "testInvalidTar" 300 "testInvalidZip" 301 # Downloads from Huggingface 302 "TestAgent" 303 "TestCloud" 304 "TestConsole" 305 "TestEmbeddings" 306 "TestGraph" 307 "TestWorkflow" 308 "testPipeline" 309 "testVectors" 310 # Not finding sqlite-vec despite being supplied 311 "testSQLite" 312 "testSQLiteCustom" 313 ]; 314 315 meta = { 316 description = "Semantic search and workflows powered by language models"; 317 changelog = "https://github.com/neuml/txtai/releases/tag/${src.tag}"; 318 homepage = "https://github.com/neuml/txtai"; 319 license = lib.licenses.asl20; 320 maintainers = with lib.maintainers; [ happysalada ]; 321 }; 322}