1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5
6 # build-system
7 setuptools,
8
9 # dependencies
10 faiss,
11 torch,
12 transformers,
13 huggingface-hub,
14 numpy,
15 pyyaml,
16 regex,
17
18 # optional-dependencies
19 # agent
20 mcpadapt,
21 smolagents,
22 # ann
23 annoy,
24 hnswlib,
25 pgvector,
26 sqlalchemy,
27 sqlite-vec-c,
28 # api
29 aiohttp,
30 fastapi,
31 fastapi-mcp,
32 httpx,
33 pillow,
34 python-multipart,
35 uvicorn,
36 # cloud
37 # apache-libcloud, (unpackaged)
38 fasteners,
39 # console
40 rich,
41 # database
42 duckdb,
43 # graph
44 # grand-cypher (unpackaged)
45 # grand-graph (unpackaged)
46 networkx,
47 # model
48 onnx,
49 onnxruntime,
50 # pipeline-audio
51 # model2vec,
52 sounddevice,
53 soundfile,
54 scipy,
55 ttstokenizer,
56 webrtcvad,
57 # pipeline-data
58 beautifulsoup4,
59 nltk,
60 pandas,
61 tika,
62 # pipeline-image
63 imagehash,
64 timm,
65 # pipeline-llm
66 litellm,
67 # llama-cpp-python, (unpackaged)
68 # pipeline-text
69 gliner,
70 sentencepiece,
71 staticvectors,
72 # pipeline-train
73 accelerate,
74 bitsandbytes,
75 onnxmltools,
76 peft,
77 skl2onnx,
78 # vectors
79 fasttext,
80 # pymagnitude-lite, (unpackaged)
81 scikit-learn,
82 sentence-transformers,
83 skops,
84 # workflow
85 # apache-libcloud (unpackaged)
86 croniter,
87 openpyxl,
88 requests,
89 xmltodict,
90
91 # tests
92 msgpack,
93 pytestCheckHook,
94}:
95let
96 version = "9.0.1";
97 agent = [
98 mcpadapt
99 smolagents
100 ];
101 ann = [
102 annoy
103 hnswlib
104 pgvector
105 sqlalchemy
106 sqlite-vec-c
107 ];
108 api = [
109 aiohttp
110 fastapi
111 fastapi-mcp
112 httpx
113 pillow
114 python-multipart
115 uvicorn
116 ];
117 cloud = [
118 # apache-libcloud
119 fasteners
120 ];
121 console = [ rich ];
122 database = [
123 duckdb
124 pillow
125 sqlalchemy
126 ];
127 graph = [
128 # grand-cypher
129 # grand-graph
130 networkx
131 sqlalchemy
132 ];
133 model = [
134 onnx
135 onnxruntime
136 ];
137 pipeline-audio = [
138 onnx
139 onnxruntime
140 scipy
141 sounddevice
142 soundfile
143 ttstokenizer
144 webrtcvad
145 ];
146 pipeline-data = [
147 beautifulsoup4
148 nltk
149 pandas
150 tika
151 ];
152 pipeline-image = [
153 imagehash
154 pillow
155 timm
156 ];
157 pipeline-llm = [
158 litellm
159 # llama-cpp-python
160 ];
161 pipeline-text = [
162 gliner
163 sentencepiece
164 staticvectors
165 ];
166 pipeline-train = [
167 accelerate
168 bitsandbytes
169 onnx
170 onnxmltools
171 onnxruntime
172 peft
173 skl2onnx
174 ];
175 pipeline =
176 pipeline-audio
177 ++ pipeline-data
178 ++ pipeline-image
179 ++ pipeline-llm
180 ++ pipeline-text
181 ++ pipeline-train;
182 scoring = [ sqlalchemy ];
183 vectors = [
184 fasttext
185 litellm
186 # llama-cpp-python
187 # model2vec
188 # pymagnitude-lite
189 scikit-learn
190 sentence-transformers
191 skops
192 ];
193 workflow = [
194 # apache-libcloud
195 croniter
196 openpyxl
197 pandas
198 pillow
199 requests
200 xmltodict
201 ];
202 similarity = ann ++ vectors;
203 all =
204 agent
205 ++ api
206 ++ ann
207 ++ console
208 ++ database
209 ++ graph
210 ++ model
211 ++ pipeline
212 ++ scoring
213 ++ similarity
214 ++ workflow;
215
216 optional-dependencies = {
217 inherit
218 agent
219 ann
220 api
221 cloud
222 console
223 database
224 graph
225 model
226 pipeline-audio
227 pipeline-data
228 pipeline-image
229 pipeline-llm
230 pipeline-text
231 pipeline-train
232 pipeline
233 scoring
234 similarity
235 workflow
236 all
237 ;
238 };
239
240 src = fetchFromGitHub {
241 owner = "neuml";
242 repo = "txtai";
243 tag = "v${version}";
244 hash = "sha256-ciQDKpqTdgYe4oIgd2uxY7491SMr9Snha9XyTpxgXyY=";
245 };
246in
247buildPythonPackage {
248 pname = "txtai";
249 inherit version src;
250 pyproject = true;
251
252 build-system = [ setuptools ];
253
254 pythonRemoveDeps = [
255 # We call it faiss, not faiss-cpu.
256 "faiss-cpu"
257 ];
258
259 dependencies = [
260 faiss
261 huggingface-hub
262 msgpack
263 numpy
264 pyyaml
265 regex
266 torch
267 transformers
268 ];
269
270 optional-dependencies = optional-dependencies;
271
272 # The Python imports check runs huggingface-hub which needs a writable directory.
273 # `pythonImportsCheck` runs in the installPhase (before checkPhase).
274 preInstall = ''
275 export HF_HOME=$(mktemp -d)
276 '';
277
278 pythonImportsCheck = [ "txtai" ];
279
280 nativeCheckInputs = [
281 httpx
282 msgpack
283 pytestCheckHook
284 python-multipart
285 timm
286 sqlalchemy
287 ]
288 ++ optional-dependencies.agent
289 ++ optional-dependencies.ann
290 ++ optional-dependencies.api
291 ++ optional-dependencies.similarity;
292
293 pytestFlagsArray = [
294 "test/python/*"
295 ];
296
297 disabledTests = [
298 # Hardcoded paths
299 "testInvalidTar"
300 "testInvalidZip"
301 # Downloads from Huggingface
302 "TestAgent"
303 "TestCloud"
304 "TestConsole"
305 "TestEmbeddings"
306 "TestGraph"
307 "TestWorkflow"
308 "testPipeline"
309 "testVectors"
310 # Not finding sqlite-vec despite being supplied
311 "testSQLite"
312 "testSQLiteCustom"
313 ];
314
315 meta = {
316 description = "Semantic search and workflows powered by language models";
317 changelog = "https://github.com/neuml/txtai/releases/tag/${src.tag}";
318 homepage = "https://github.com/neuml/txtai";
319 license = lib.licenses.asl20;
320 maintainers = with lib.maintainers; [ happysalada ];
321 };
322}