at 25.11-pre 5.5 kB view raw
1{ 2 lib, 3 buildPythonPackage, 4 fetchFromGitHub, 5 6 # build-system 7 setuptools, 8 9 # dependencies 10 faiss, 11 torch, 12 transformers, 13 huggingface-hub, 14 numpy, 15 pyyaml, 16 regex, 17 18 # optional-dependencies 19 # ann 20 annoy, 21 hnswlib, 22 pgvector, 23 sqlalchemy, 24 sqlite-vec, 25 # api 26 aiohttp, 27 fastapi, 28 pillow, 29 python-multipart, 30 uvicorn, 31 # cloud 32 # apache-libcloud, (unpackaged) 33 # console 34 rich, 35 # database 36 duckdb, 37 # graph 38 # grand-cypher (unpackaged) 39 # grand-graph (unpackaged) 40 networkx, 41 python-louvain, 42 # model 43 onnx, 44 onnxruntime, 45 # pipeline-audio 46 # model2vec, 47 sounddevice, 48 soundfile, 49 scipy, 50 ttstokenizer, 51 webrtcvad, 52 # pipeline-data 53 beautifulsoup4, 54 nltk, 55 pandas, 56 tika, 57 # pipeline-image 58 imagehash, 59 timm, 60 # pipeline-llm 61 litellm, 62 # llama-cpp-python, (unpackaged) 63 # pipeline-text 64 fasttext, 65 sentencepiece, 66 # pipeline-train 67 accelerate, 68 bitsandbytes, 69 onnxmltools, 70 peft, 71 skl2onnx, 72 # vectors 73 # pymagnitude-lite, (unpackaged) 74 scikit-learn, 75 sentence-transformers, 76 skops, 77 # workflow 78 # apache-libcloud (unpackaged) 79 croniter, 80 openpyxl, 81 requests, 82 xmltodict, 83 84 # tests 85 httpx, 86 msgpack, 87 pytestCheckHook, 88}: 89let 90 version = "8.2.0"; 91 ann = [ 92 annoy 93 hnswlib 94 pgvector 95 sqlalchemy 96 sqlite-vec 97 ]; 98 api = [ 99 aiohttp 100 fastapi 101 pillow 102 python-multipart 103 uvicorn 104 ]; 105 # cloud = [ apache-libcloud ]; 106 console = [ rich ]; 107 database = [ 108 duckdb 109 pillow 110 sqlalchemy 111 ]; 112 graph = [ 113 # grand-cypher 114 # grand-graph 115 networkx 116 python-louvain 117 sqlalchemy 118 ]; 119 model = [ 120 onnx 121 onnxruntime 122 ]; 123 pipeline-audio = [ 124 onnx 125 onnxruntime 126 scipy 127 sounddevice 128 soundfile 129 ttstokenizer 130 webrtcvad 131 ]; 132 pipeline-data = [ 133 beautifulsoup4 134 nltk 135 pandas 136 tika 137 ]; 138 pipeline-image = [ 139 imagehash 140 pillow 141 timm 142 ]; 143 pipeline-llm = [ 144 litellm 145 # llama-cpp-python 146 ]; 147 pipeline-text = [ 148 fasttext 149 sentencepiece 150 ]; 151 pipeline-train = [ 152 accelerate 153 bitsandbytes 154 onnx 155 onnxmltools 156 onnxruntime 157 peft 158 skl2onnx 159 ]; 160 pipeline = 161 pipeline-audio 162 ++ pipeline-data 163 ++ pipeline-image 164 ++ pipeline-llm 165 ++ pipeline-text 166 ++ pipeline-train; 167 scoring = [ sqlalchemy ]; 168 vectors = [ 169 fasttext 170 litellm 171 # llama-cpp-python 172 # model2vec 173 # pymagnitude-lite 174 scikit-learn 175 sentence-transformers 176 skops 177 ]; 178 workflow = [ 179 # apache-libcloud 180 croniter 181 openpyxl 182 pandas 183 pillow 184 requests 185 xmltodict 186 ]; 187 similarity = ann ++ vectors; 188 all = 189 api 190 ++ ann 191 ++ console 192 ++ database 193 ++ graph 194 ++ model 195 ++ pipeline 196 ++ scoring 197 ++ similarity 198 ++ workflow; 199 200 optional-dependencies = { 201 inherit 202 ann 203 api 204 console 205 database 206 graph 207 model 208 pipeline-audio 209 pipeline-image 210 pipeline-llm 211 pipeline-text 212 pipeline-train 213 pipeline 214 scoring 215 similarity 216 workflow 217 all 218 ; 219 }; 220 221 src = fetchFromGitHub { 222 owner = "neuml"; 223 repo = "txtai"; 224 tag = "v${version}"; 225 hash = "sha256-fMzCYw9eqlpGI5FKoyYyxT17EhUFmFP9lrCn/LFC6ks="; 226 }; 227in 228buildPythonPackage { 229 pname = "txtai"; 230 inherit version src; 231 pyproject = true; 232 233 build-system = [ setuptools ]; 234 235 pythonRemoveDeps = [ 236 # We call it faiss, not faiss-cpu. 237 "faiss-cpu" 238 ]; 239 240 dependencies = [ 241 faiss 242 huggingface-hub 243 msgpack 244 numpy 245 pyyaml 246 regex 247 torch 248 transformers 249 ]; 250 251 optional-dependencies = optional-dependencies; 252 253 # The Python imports check runs huggingface-hub which needs a writable directory. 254 # `pythonImportsCheck` runs in the installPhase (before checkPhase). 255 preInstall = '' 256 export HF_HOME=$(mktemp -d) 257 ''; 258 259 pythonImportsCheck = [ "txtai" ]; 260 261 nativeCheckInputs = 262 [ 263 httpx 264 msgpack 265 pytestCheckHook 266 python-multipart 267 sqlalchemy 268 ] 269 ++ optional-dependencies.ann 270 ++ optional-dependencies.api 271 ++ optional-dependencies.similarity; 272 273 # The deselected paths depend on the huggingface hub and should be run as a passthru test 274 # disabledTestPaths won't work as the problem is with the classes containing the tests 275 # (in other words, it fails on __init__) 276 pytestFlagsArray = [ 277 "test/python/test*.py" 278 "--deselect=test/python/testagent.py" 279 "--deselect=test/python/testcloud.py" 280 "--deselect=test/python/testconsole.py" 281 "--deselect=test/python/testembeddings.py" 282 "--deselect=test/python/testgraph.py" 283 "--deselect=test/python/testapi/testembeddings.py" 284 "--deselect=test/python/testapi/testpipelines.py" 285 "--deselect=test/python/testapi/testworkflow.py" 286 "--deselect=test/python/testdatabase/testclient.py" 287 "--deselect=test/python/testdatabase/testduckdb.py" 288 "--deselect=test/python/testdatabase/testencoder.py" 289 "--deselect=test/python/testworkflow.py" 290 ]; 291 292 disabledTests = [ 293 # Hardcoded paths 294 "testInvalidTar" 295 "testInvalidZip" 296 # Downloads from Huggingface 297 "testPipeline" 298 # Not finding sqlite-vec despite being supplied 299 "testSQLite" 300 "testSQLiteCustom" 301 ]; 302 303 meta = { 304 description = "Semantic search and workflows powered by language models"; 305 changelog = "https://github.com/neuml/txtai/releases/tag/${src.tag}"; 306 homepage = "https://github.com/neuml/txtai"; 307 license = lib.licenses.asl20; 308 maintainers = with lib.maintainers; [ happysalada ]; 309 }; 310}