1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5
6 # build-system
7 setuptools,
8
9 # dependencies
10 faiss,
11 torch,
12 transformers,
13 huggingface-hub,
14 numpy,
15 pyyaml,
16 regex,
17
18 # optional-dependencies
19 # agent
20 mcpadapt,
21 smolagents,
22 # ann
23 annoy,
24 hnswlib,
25 pgvector,
26 sqlalchemy,
27 sqlite-vec,
28 # api
29 aiohttp,
30 fastapi,
31 fastapi-mcp,
32 httpx,
33 pillow,
34 python-multipart,
35 uvicorn,
36 # cloud
37 # apache-libcloud, (unpackaged)
38 fasteners,
39 # console
40 rich,
41 # database
42 duckdb,
43 # graph
44 # grand-cypher (unpackaged)
45 # grand-graph (unpackaged)
46 networkx,
47 # model
48 onnx,
49 onnxruntime,
50 # pipeline-audio
51 # model2vec,
52 sounddevice,
53 soundfile,
54 scipy,
55 ttstokenizer,
56 webrtcvad,
57 # pipeline-data
58 beautifulsoup4,
59 nltk,
60 pandas,
61 tika,
62 # pipeline-image
63 imagehash,
64 timm,
65 # pipeline-llm
66 litellm,
67 # llama-cpp-python, (unpackaged)
68 # pipeline-text
69 gliner,
70 sentencepiece,
71 staticvectors,
72 # pipeline-train
73 accelerate,
74 bitsandbytes,
75 onnxmltools,
76 peft,
77 skl2onnx,
78 # vectors
79 fasttext,
80 # pymagnitude-lite, (unpackaged)
81 scikit-learn,
82 sentence-transformers,
83 skops,
84 # workflow
85 # apache-libcloud (unpackaged)
86 croniter,
87 openpyxl,
88 requests,
89 xmltodict,
90
91 # tests
92 msgpack,
93 pytestCheckHook,
94}:
95let
96 version = "8.5.0";
97 agent = [
98 mcpadapt
99 smolagents
100 ];
101 ann = [
102 annoy
103 hnswlib
104 pgvector
105 sqlalchemy
106 sqlite-vec
107 ];
108 api = [
109 aiohttp
110 fastapi
111 fastapi-mcp
112 httpx
113 pillow
114 python-multipart
115 uvicorn
116 ];
117 cloud = [
118 # apache-libcloud
119 fasteners
120 ];
121 console = [ rich ];
122 database = [
123 duckdb
124 pillow
125 sqlalchemy
126 ];
127 graph = [
128 # grand-cypher
129 # grand-graph
130 networkx
131 sqlalchemy
132 ];
133 model = [
134 onnx
135 onnxruntime
136 ];
137 pipeline-audio = [
138 onnx
139 onnxruntime
140 scipy
141 sounddevice
142 soundfile
143 ttstokenizer
144 webrtcvad
145 ];
146 pipeline-data = [
147 beautifulsoup4
148 nltk
149 pandas
150 tika
151 ];
152 pipeline-image = [
153 imagehash
154 pillow
155 timm
156 ];
157 pipeline-llm = [
158 litellm
159 # llama-cpp-python
160 ];
161 pipeline-text = [
162 gliner
163 sentencepiece
164 staticvectors
165 ];
166 pipeline-train = [
167 accelerate
168 bitsandbytes
169 onnx
170 onnxmltools
171 onnxruntime
172 peft
173 skl2onnx
174 ];
175 pipeline =
176 pipeline-audio
177 ++ pipeline-data
178 ++ pipeline-image
179 ++ pipeline-llm
180 ++ pipeline-text
181 ++ pipeline-train;
182 scoring = [ sqlalchemy ];
183 vectors = [
184 fasttext
185 litellm
186 # llama-cpp-python
187 # model2vec
188 # pymagnitude-lite
189 scikit-learn
190 sentence-transformers
191 skops
192 ];
193 workflow = [
194 # apache-libcloud
195 croniter
196 openpyxl
197 pandas
198 pillow
199 requests
200 xmltodict
201 ];
202 similarity = ann ++ vectors;
203 all =
204 agent
205 ++ api
206 ++ ann
207 ++ console
208 ++ database
209 ++ graph
210 ++ model
211 ++ pipeline
212 ++ scoring
213 ++ similarity
214 ++ workflow;
215
216 optional-dependencies = {
217 inherit
218 agent
219 ann
220 api
221 cloud
222 console
223 database
224 graph
225 model
226 pipeline-audio
227 pipeline-image
228 pipeline-llm
229 pipeline-text
230 pipeline-train
231 pipeline
232 scoring
233 similarity
234 workflow
235 all
236 ;
237 };
238
239 src = fetchFromGitHub {
240 owner = "neuml";
241 repo = "txtai";
242 tag = "v${version}";
243 hash = "sha256-kYjlA7pJ+xCC+tu0aaxziKaPo3hph5Ld8P/lVrip/eM=";
244 };
245in
246buildPythonPackage {
247 pname = "txtai";
248 inherit version src;
249 pyproject = true;
250
251 build-system = [ setuptools ];
252
253 pythonRemoveDeps = [
254 # We call it faiss, not faiss-cpu.
255 "faiss-cpu"
256 ];
257
258 dependencies = [
259 faiss
260 huggingface-hub
261 msgpack
262 numpy
263 pyyaml
264 regex
265 torch
266 transformers
267 ];
268
269 optional-dependencies = optional-dependencies;
270
271 # The Python imports check runs huggingface-hub which needs a writable directory.
272 # `pythonImportsCheck` runs in the installPhase (before checkPhase).
273 preInstall = ''
274 export HF_HOME=$(mktemp -d)
275 '';
276
277 pythonImportsCheck = [ "txtai" ];
278
279 nativeCheckInputs =
280 [
281 httpx
282 msgpack
283 pytestCheckHook
284 python-multipart
285 timm
286 sqlalchemy
287 ]
288 ++ optional-dependencies.agent
289 ++ optional-dependencies.ann
290 ++ optional-dependencies.api
291 ++ optional-dependencies.similarity;
292
293 # The deselected paths depend on the huggingface hub and should be run as a passthru test
294 # disabledTestPaths won't work as the problem is with the classes containing the tests
295 # (in other words, it fails on __init__)
296 pytestFlagsArray = [
297 "test/python/test*.py"
298 "--deselect=test/python/testagent.py"
299 "--deselect=test/python/testcloud.py"
300 "--deselect=test/python/testconsole.py"
301 "--deselect=test/python/testembeddings.py"
302 "--deselect=test/python/testgraph.py"
303 "--deselect=test/python/testapi/testapiembeddings.py"
304 "--deselect=test/python/testapi/testapipipelines.py"
305 "--deselect=test/python/testapi/testapiworkflow.py"
306 "--deselect=test/python/testdatabase/testclient.py"
307 "--deselect=test/python/testdatabase/testduckdb.py"
308 "--deselect=test/python/testdatabase/testencoder.py"
309 "--deselect=test/python/testworkflow.py"
310 ];
311
312 disabledTests = [
313 # Hardcoded paths
314 "testInvalidTar"
315 "testInvalidZip"
316 # Downloads from Huggingface
317 "testPipeline"
318 "testVectors"
319 # Not finding sqlite-vec despite being supplied
320 "testSQLite"
321 "testSQLiteCustom"
322 ];
323
324 meta = {
325 description = "Semantic search and workflows powered by language models";
326 changelog = "https://github.com/neuml/txtai/releases/tag/${src.tag}";
327 homepage = "https://github.com/neuml/txtai";
328 license = lib.licenses.asl20;
329 maintainers = with lib.maintainers; [ happysalada ];
330 };
331}