1{
2 lib,
3 buildPythonPackage,
4 pythonOlder,
5 fetchFromGitHub,
6 pythonRelaxDepsHook,
7 # propagated build input
8 faiss,
9 torch,
10 transformers,
11 huggingface-hub,
12 numpy,
13 pyyaml,
14 regex,
15 # optional-dependencies
16 aiohttp,
17 fastapi,
18 uvicorn,
19 # TODO add apache-libcloud
20 # , apache-libcloud
21 rich,
22 duckdb,
23 pillow,
24 networkx,
25 python-louvain,
26 onnx,
27 onnxruntime,
28 soundfile,
29 scipy,
30 ttstokenizer,
31 beautifulsoup4,
32 nltk,
33 pandas,
34 tika,
35 imagehash,
36 timm,
37 fasttext,
38 sentencepiece,
39 accelerate,
40 onnxmltools,
41 annoy,
42 hnswlib,
43 # TODO add pymagnitude-lite
44 #, pymagnitude-lite
45 scikit-learn,
46 sentence-transformers,
47 croniter,
48 openpyxl,
49 requests,
50 xmltodict,
51 # native check inputs
52 unittestCheckHook,
53}:
54let
55 version = "7.1.0";
56 api = [
57 aiohttp
58 fastapi
59 uvicorn
60 ];
61 # cloud = [ apache-libcloud ];
62 console = [ rich ];
63
64 database = [
65 duckdb
66 pillow
67 ];
68
69 graph = [
70 networkx
71 python-louvain
72 ];
73
74 model = [
75 onnx
76 onnxruntime
77 ];
78
79 pipeline-audio = [
80 onnx
81 onnxruntime
82 soundfile
83 scipy
84 ttstokenizer
85 ];
86 pipeline-data = [
87 beautifulsoup4
88 nltk
89 pandas
90 tika
91 ];
92 pipeline-image = [
93 imagehash
94 pillow
95 timm
96 ];
97 pipeline-text = [
98 fasttext
99 sentencepiece
100 ];
101 pipeline-train = [
102 accelerate
103 onnx
104 onnxmltools
105 onnxruntime
106 ];
107 pipeline = pipeline-audio ++ pipeline-data ++ pipeline-image ++ pipeline-text ++ pipeline-train;
108
109 similarity = [
110 annoy
111 fasttext
112 hnswlib
113 # pymagnitude-lite
114 scikit-learn
115 sentence-transformers
116 ];
117 workflow = [
118 # apache-libcloud
119 croniter
120 openpyxl
121 pandas
122 pillow
123 requests
124 xmltodict
125 ];
126 all = api ++ console ++ database ++ graph ++ model ++ pipeline ++ similarity ++ workflow;
127
128 optional-dependencies = {
129 inherit
130 api
131 console
132 database
133 graph
134 model
135 pipeline-audio
136 pipeline-image
137 pipeline-text
138 pipeline-train
139 pipeline
140 similarity
141 workflow
142 all
143 ;
144 };
145in
146buildPythonPackage {
147 pname = "txtai";
148 inherit version;
149 format = "setuptools";
150
151 disabled = pythonOlder "3.8";
152
153 src = fetchFromGitHub {
154 owner = "neuml";
155 repo = "txtai";
156 rev = "refs/tags/v${version}";
157 hash = "sha256-L+L2jRkCQKOgd1k3N4mft0Kt6kvCN81lgSQUjoon5rk=";
158 };
159
160 nativeBuildInputs = [ pythonRelaxDepsHook ];
161
162 pythonRemoveDeps = [
163 # We call it faiss, not faiss-cpu.
164 "faiss-cpu"
165 ];
166
167 propagatedBuildInputs = [
168 faiss
169 torch
170 transformers
171 huggingface-hub
172 numpy
173 pyyaml
174 regex
175 ];
176
177 passthru.optional-dependencies = optional-dependencies;
178
179 pythonImportsCheck = [ "txtai" ];
180
181 # some tests hang forever
182 doCheck = false;
183
184 preCheck = ''
185 export TRANSFORMERS_CACHE=$(mktemp -d)
186 '';
187
188 nativeCheckInputs = [
189 unittestCheckHook
190 ] ++ optional-dependencies.api ++ optional-dependencies.similarity;
191
192 unittestFlagsArray = [
193 "-s"
194 "test/python"
195 "-v"
196 ];
197
198 meta = with lib; {
199 description = "Semantic search and workflows powered by language models";
200 changelog = "https://github.com/neuml/txtai/releases/tag/v${version}";
201 homepage = "https://github.com/neuml/txtai";
202 license = licenses.asl20;
203 maintainers = with maintainers; [ happysalada ];
204 };
205}