1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5 pythonRelaxDepsHook,
6 hatchling,
7 boilerpy3,
8 events,
9 httpx,
10 jsonschema,
11 lazy-imports,
12 more-itertools,
13 networkx,
14 pandas,
15 pillow,
16 platformdirs,
17 posthog,
18 prompthub-py,
19 pydantic,
20 quantulum3,
21 rank-bm25,
22 requests,
23 requests-cache,
24 scikit-learn,
25 sseclient-py,
26 tenacity,
27 tiktoken,
28 tqdm,
29 transformers,
30 openai-whisper,
31 boto3,
32 botocore,
33 # , beir
34 selenium,
35 coverage,
36 dulwich,
37 # , jupytercontrib
38 mkdocs,
39 mypy,
40 pre-commit,
41 psutil,
42 # , pydoc-markdown
43 pylint,
44 pytest,
45 pytest-asyncio,
46 pytest-cov,
47 # , pytest-custom-exit-code
48 python-multipart,
49 reno,
50 responses,
51 toml,
52 tox,
53 watchdog,
54 elastic-transport,
55 elasticsearch,
56 # , azure-ai-formrecognizer
57 beautifulsoup4,
58 markdown,
59 python-docx,
60 python-frontmatter,
61 python-magic,
62 tika,
63 black,
64 huggingface-hub,
65 sentence-transformers,
66 mlflow,
67 rapidfuzz,
68 scipy,
69 seqeval,
70 pdf2image,
71 pytesseract,
72 faiss,
73 # , faiss-gpu
74 pinecone-client,
75 onnxruntime,
76 onnxruntime-tools,
77 # , onnxruntime-gpu
78 opensearch-py,
79 pymupdf,
80 langdetect,
81 nltk,
82 canals,
83 jinja2,
84 openai,
85 aiorwlock,
86 ray,
87 psycopg2,
88 sqlalchemy,
89 sqlalchemy-utils,
90 weaviate-client,
91}:
92
93buildPythonPackage rec {
94 pname = "farm-haystack";
95 version = "1.25.0";
96 pyproject = true;
97
98 src = fetchFromGitHub {
99 owner = "deepset-ai";
100 repo = "haystack";
101 rev = "refs/tags/v${version}";
102 hash = "sha256-lAXzWnHpOBVjgleFTYqlZ34hmZkcrPJ/h1kk4iVVvec=";
103 };
104
105 nativeBuildInputs = [
106 hatchling
107 pythonRelaxDepsHook
108 ];
109
110 pythonRemoveDeps = [
111 # We call it faiss, not faiss-cpu.
112 "faiss-cpu"
113 ];
114
115 propagatedBuildInputs = [
116 boilerpy3
117 events
118 httpx
119 jsonschema
120 lazy-imports
121 more-itertools
122 networkx
123 pandas
124 pillow
125 platformdirs
126 posthog
127 prompthub-py
128 pydantic
129 quantulum3
130 rank-bm25
131 requests
132 requests-cache
133 scikit-learn
134 sseclient-py
135 tenacity
136 tiktoken
137 tqdm
138 transformers
139 ];
140
141 env.HOME = "$(mktemp -d)";
142
143 passthru.optional-dependencies = {
144 # all = [
145 # farm-haystack
146 # ];
147 # all-gpu = [
148 # farm-haystack
149 # ];
150 audio = [ openai-whisper ];
151 aws = [
152 boto3
153 botocore
154 ];
155 # beir = [
156 # beir
157 # ];
158 colab = [ pillow ];
159 crawler = [ selenium ];
160 dev = [
161 coverage
162 dulwich
163 # jupytercontrib
164 mkdocs
165 mypy
166 pre-commit
167 psutil
168 # pydoc-markdown
169 pylint
170 pytest
171 pytest-asyncio
172 pytest-cov
173 # pytest-custom-exit-code
174 python-multipart
175 reno
176 responses
177 toml
178 tox
179 watchdog
180 ];
181 elasticsearch7 = [
182 elastic-transport
183 elasticsearch
184 ];
185 elasticsearch8 = [
186 elastic-transport
187 elasticsearch
188 ];
189 file-conversion = [
190 # azure-ai-formrecognizer
191 beautifulsoup4
192 markdown
193 python-docx
194 python-frontmatter
195 python-magic
196 # python-magic-bin
197 tika
198 ];
199 formatting = [ black ];
200 inference = [
201 huggingface-hub
202 sentence-transformers
203 transformers
204 ];
205 metrics = [
206 mlflow
207 rapidfuzz
208 scipy
209 seqeval
210 ];
211 ocr = [
212 pdf2image
213 pytesseract
214 ];
215 only-faiss = [ faiss ];
216 # only-faiss-gpu = [
217 # faiss-gpu
218 # ];
219 only-pinecone = [ pinecone-client ];
220 onnx = [
221 onnxruntime
222 onnxruntime-tools
223 ];
224 # onnx-gpu = [
225 # onnxruntime-gpu
226 # onnxruntime-tools
227 # ];
228 opensearch = [ opensearch-py ];
229 pdf = [ pymupdf ];
230 preprocessing = [
231 langdetect
232 nltk
233 ];
234 preview = [
235 canals
236 jinja2
237 lazy-imports
238 openai
239 pandas
240 rank-bm25
241 requests
242 tenacity
243 tqdm
244 ];
245 ray = [
246 aiorwlock
247 ray
248 ];
249 sql = [
250 psycopg2
251 sqlalchemy
252 sqlalchemy-utils
253 ];
254 weaviate = [ weaviate-client ];
255 };
256
257 # the setup for test is intensive, hopefully can be done at some point
258 doCheck = false;
259
260 pythonImportsCheck = [ "haystack" ];
261
262 meta = with lib; {
263 description = "LLM orchestration framework to build customizable, production-ready LLM applications";
264 longDescription = ''
265 LLM orchestration framework to build customizable, production-ready LLM applications. Connect components (models, vector DBs, file converters) to pipelines or agents that can interact with your data. With advanced retrieval methods, it's best suited for building RAG, question answering, semantic search or conversational agent chatbots
266 '';
267 changelog = "https://github.com/deepset-ai/haystack/releases/tag/${src.rev}";
268 homepage = "https://github.com/deepset-ai/haystack";
269 license = licenses.asl20;
270 maintainers = with maintainers; [ happysalada ];
271 # https://github.com/deepset-ai/haystack/issues/5304
272 broken = versionAtLeast pydantic.version "2";
273 };
274}