1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5
6 # build-system
7 setuptools,
8
9 # dependencies
10 faiss,
11 torch,
12 transformers,
13 huggingface-hub,
14 numpy,
15 pyyaml,
16 regex,
17
18 # optional-dependencies
19 # ann
20 annoy,
21 hnswlib,
22 pgvector,
23 sqlalchemy,
24 sqlite-vec,
25 # api
26 aiohttp,
27 fastapi,
28 pillow,
29 python-multipart,
30 uvicorn,
31 # cloud
32 # apache-libcloud, (unpackaged)
33 # console
34 rich,
35 # database
36 duckdb,
37 # graph
38 # grand-cypher (unpackaged)
39 # grand-graph (unpackaged)
40 networkx,
41 python-louvain,
42 # model
43 onnx,
44 onnxruntime,
45 # pipeline-audio
46 # model2vec,
47 sounddevice,
48 soundfile,
49 scipy,
50 ttstokenizer,
51 webrtcvad,
52 # pipeline-data
53 beautifulsoup4,
54 nltk,
55 pandas,
56 tika,
57 # pipeline-image
58 imagehash,
59 timm,
60 # pipeline-llm
61 litellm,
62 # llama-cpp-python, (unpackaged)
63 # pipeline-text
64 fasttext,
65 sentencepiece,
66 # pipeline-train
67 accelerate,
68 bitsandbytes,
69 onnxmltools,
70 peft,
71 skl2onnx,
72 # vectors
73 # pymagnitude-lite, (unpackaged)
74 scikit-learn,
75 sentence-transformers,
76 skops,
77 # workflow
78 # apache-libcloud (unpackaged)
79 croniter,
80 openpyxl,
81 requests,
82 xmltodict,
83
84 # tests
85 httpx,
86 msgpack,
87 pytestCheckHook,
88}:
89let
90 version = "8.2.0";
91 ann = [
92 annoy
93 hnswlib
94 pgvector
95 sqlalchemy
96 sqlite-vec
97 ];
98 api = [
99 aiohttp
100 fastapi
101 pillow
102 python-multipart
103 uvicorn
104 ];
105 # cloud = [ apache-libcloud ];
106 console = [ rich ];
107 database = [
108 duckdb
109 pillow
110 sqlalchemy
111 ];
112 graph = [
113 # grand-cypher
114 # grand-graph
115 networkx
116 python-louvain
117 sqlalchemy
118 ];
119 model = [
120 onnx
121 onnxruntime
122 ];
123 pipeline-audio = [
124 onnx
125 onnxruntime
126 scipy
127 sounddevice
128 soundfile
129 ttstokenizer
130 webrtcvad
131 ];
132 pipeline-data = [
133 beautifulsoup4
134 nltk
135 pandas
136 tika
137 ];
138 pipeline-image = [
139 imagehash
140 pillow
141 timm
142 ];
143 pipeline-llm = [
144 litellm
145 # llama-cpp-python
146 ];
147 pipeline-text = [
148 fasttext
149 sentencepiece
150 ];
151 pipeline-train = [
152 accelerate
153 bitsandbytes
154 onnx
155 onnxmltools
156 onnxruntime
157 peft
158 skl2onnx
159 ];
160 pipeline =
161 pipeline-audio
162 ++ pipeline-data
163 ++ pipeline-image
164 ++ pipeline-llm
165 ++ pipeline-text
166 ++ pipeline-train;
167 scoring = [ sqlalchemy ];
168 vectors = [
169 fasttext
170 litellm
171 # llama-cpp-python
172 # model2vec
173 # pymagnitude-lite
174 scikit-learn
175 sentence-transformers
176 skops
177 ];
178 workflow = [
179 # apache-libcloud
180 croniter
181 openpyxl
182 pandas
183 pillow
184 requests
185 xmltodict
186 ];
187 similarity = ann ++ vectors;
188 all =
189 api
190 ++ ann
191 ++ console
192 ++ database
193 ++ graph
194 ++ model
195 ++ pipeline
196 ++ scoring
197 ++ similarity
198 ++ workflow;
199
200 optional-dependencies = {
201 inherit
202 ann
203 api
204 console
205 database
206 graph
207 model
208 pipeline-audio
209 pipeline-image
210 pipeline-llm
211 pipeline-text
212 pipeline-train
213 pipeline
214 scoring
215 similarity
216 workflow
217 all
218 ;
219 };
220
221 src = fetchFromGitHub {
222 owner = "neuml";
223 repo = "txtai";
224 tag = "v${version}";
225 hash = "sha256-fMzCYw9eqlpGI5FKoyYyxT17EhUFmFP9lrCn/LFC6ks=";
226 };
227in
228buildPythonPackage {
229 pname = "txtai";
230 inherit version src;
231 pyproject = true;
232
233 build-system = [ setuptools ];
234
235 pythonRemoveDeps = [
236 # We call it faiss, not faiss-cpu.
237 "faiss-cpu"
238 ];
239
240 dependencies = [
241 faiss
242 huggingface-hub
243 msgpack
244 numpy
245 pyyaml
246 regex
247 torch
248 transformers
249 ];
250
251 optional-dependencies = optional-dependencies;
252
253 # The Python imports check runs huggingface-hub which needs a writable directory.
254 # `pythonImportsCheck` runs in the installPhase (before checkPhase).
255 preInstall = ''
256 export HF_HOME=$(mktemp -d)
257 '';
258
259 pythonImportsCheck = [ "txtai" ];
260
261 nativeCheckInputs =
262 [
263 httpx
264 msgpack
265 pytestCheckHook
266 python-multipart
267 sqlalchemy
268 ]
269 ++ optional-dependencies.ann
270 ++ optional-dependencies.api
271 ++ optional-dependencies.similarity;
272
273 # The deselected paths depend on the huggingface hub and should be run as a passthru test
274 # disabledTestPaths won't work as the problem is with the classes containing the tests
275 # (in other words, it fails on __init__)
276 pytestFlagsArray = [
277 "test/python/test*.py"
278 "--deselect=test/python/testagent.py"
279 "--deselect=test/python/testcloud.py"
280 "--deselect=test/python/testconsole.py"
281 "--deselect=test/python/testembeddings.py"
282 "--deselect=test/python/testgraph.py"
283 "--deselect=test/python/testapi/testembeddings.py"
284 "--deselect=test/python/testapi/testpipelines.py"
285 "--deselect=test/python/testapi/testworkflow.py"
286 "--deselect=test/python/testdatabase/testclient.py"
287 "--deselect=test/python/testdatabase/testduckdb.py"
288 "--deselect=test/python/testdatabase/testencoder.py"
289 "--deselect=test/python/testworkflow.py"
290 ];
291
292 disabledTests = [
293 # Hardcoded paths
294 "testInvalidTar"
295 "testInvalidZip"
296 # Downloads from Huggingface
297 "testPipeline"
298 # Not finding sqlite-vec despite being supplied
299 "testSQLite"
300 "testSQLiteCustom"
301 ];
302
303 meta = {
304 description = "Semantic search and workflows powered by language models";
305 changelog = "https://github.com/neuml/txtai/releases/tag/${src.tag}";
306 homepage = "https://github.com/neuml/txtai";
307 license = lib.licenses.asl20;
308 maintainers = with lib.maintainers; [ happysalada ];
309 };
310}