1{ lib
2, buildPythonPackage
3, fetchFromGitHub
4, pythonRelaxDepsHook
5, hatchling
6, boilerpy3
7, events
8, httpx
9, jsonschema
10, lazy-imports
11, more-itertools
12, networkx
13, pandas
14, pillow
15, platformdirs
16, posthog
17, prompthub-py
18, pydantic
19, quantulum3
20, rank-bm25
21, requests
22, requests-cache
23, scikit-learn
24, sseclient-py
25, tenacity
26, tiktoken
27, tqdm
28, transformers
29, openai-whisper
30, boto3
31, botocore
32# , beir
33, selenium
34, coverage
35, dulwich
36# , jupytercontrib
37, mkdocs
38, mypy
39, pre-commit
40, psutil
41# , pydoc-markdown
42, pylint
43, pytest
44, pytest-asyncio
45, pytest-cov
46# , pytest-custom-exit-code
47, python-multipart
48, reno
49, responses
50, toml
51, tox
52, watchdog
53, elastic-transport
54, elasticsearch
55# , azure-ai-formrecognizer
56, beautifulsoup4
57, markdown
58, python-docx
59, python-frontmatter
60, python-magic
61, tika
62, black
63, huggingface-hub
64, sentence-transformers
65, mlflow
66, rapidfuzz
67, scipy
68, seqeval
69, pdf2image
70, pytesseract
71, faiss
72# , faiss-gpu
73, pinecone-client
74, onnxruntime
75, onnxruntime-tools
76# , onnxruntime-gpu
77, opensearch-py
78, pymupdf
79, langdetect
80, nltk
81, canals
82, jinja2
83, openai
84, aiorwlock
85, ray
86, psycopg2
87, sqlalchemy
88, sqlalchemy-utils
89, weaviate-client
90}:
91
92buildPythonPackage rec {
93 pname = "farm-haystack";
94 version = "1.22.0";
95 pyproject = true;
96
97 src = fetchFromGitHub {
98 owner = "deepset-ai";
99 repo = "haystack";
100 rev = "refs/tags/v${version}";
101 hash = "sha256-0tRgZqySM9vPhs5lar89Gz/G7/YgRuFZosgTdKuGBH0=";
102 };
103
104 nativeBuildInputs = [
105 hatchling
106 pythonRelaxDepsHook
107 ];
108
109 pythonRemoveDeps = [
110 # We call it faiss, not faiss-cpu.
111 "faiss-cpu"
112 ];
113
114 propagatedBuildInputs = [
115 boilerpy3
116 events
117 httpx
118 jsonschema
119 lazy-imports
120 more-itertools
121 networkx
122 pandas
123 pillow
124 platformdirs
125 posthog
126 prompthub-py
127 pydantic
128 quantulum3
129 rank-bm25
130 requests
131 requests-cache
132 scikit-learn
133 sseclient-py
134 tenacity
135 tiktoken
136 tqdm
137 transformers
138 ];
139
140 env.HOME = "$(mktemp -d)";
141
142 passthru.optional-dependencies = {
143 # all = [
144 # farm-haystack
145 # ];
146 # all-gpu = [
147 # farm-haystack
148 # ];
149 audio = [
150 openai-whisper
151 ];
152 aws = [
153 boto3
154 botocore
155 ];
156 # beir = [
157 # beir
158 # ];
159 colab = [
160 pillow
161 ];
162 crawler = [
163 selenium
164 ];
165 dev = [
166 coverage
167 dulwich
168 # jupytercontrib
169 mkdocs
170 mypy
171 pre-commit
172 psutil
173 # pydoc-markdown
174 pylint
175 pytest
176 pytest-asyncio
177 pytest-cov
178 # pytest-custom-exit-code
179 python-multipart
180 reno
181 responses
182 toml
183 tox
184 watchdog
185 ];
186 elasticsearch7 = [
187 elastic-transport
188 elasticsearch
189 ];
190 elasticsearch8 = [
191 elastic-transport
192 elasticsearch
193 ];
194 file-conversion = [
195 # azure-ai-formrecognizer
196 beautifulsoup4
197 markdown
198 python-docx
199 python-frontmatter
200 python-magic
201 # python-magic-bin
202 tika
203 ];
204 formatting = [
205 black
206 ];
207 inference = [
208 huggingface-hub
209 sentence-transformers
210 transformers
211 ];
212 metrics = [
213 mlflow
214 rapidfuzz
215 scipy
216 seqeval
217 ];
218 ocr = [
219 pdf2image
220 pytesseract
221 ];
222 only-faiss = [
223 faiss
224 ];
225 # only-faiss-gpu = [
226 # faiss-gpu
227 # ];
228 only-pinecone = [
229 pinecone-client
230 ];
231 onnx = [
232 onnxruntime
233 onnxruntime-tools
234 ];
235 # onnx-gpu = [
236 # onnxruntime-gpu
237 # onnxruntime-tools
238 # ];
239 opensearch = [
240 opensearch-py
241 ];
242 pdf = [
243 pymupdf
244 ];
245 preprocessing = [
246 langdetect
247 nltk
248 ];
249 preview = [
250 canals
251 jinja2
252 lazy-imports
253 openai
254 pandas
255 rank-bm25
256 requests
257 tenacity
258 tqdm
259 ];
260 ray = [
261 aiorwlock
262 ray
263 ];
264 sql = [
265 psycopg2
266 sqlalchemy
267 sqlalchemy-utils
268 ];
269 weaviate = [
270 weaviate-client
271 ];
272 };
273
274 # the setup for test is intensive, hopefully can be done at some point
275 doCheck = false;
276
277
278 pythonImportsCheck = [ "haystack" ];
279
280 meta = with lib; {
281 description = "LLM orchestration framework to build customizable, production-ready LLM applications";
282 longDescription = ''
283 LLM orchestration framework to build customizable, production-ready LLM applications. Connect components (models, vector DBs, file converters) to pipelines or agents that can interact with your data. With advanced retrieval methods, it's best suited for building RAG, question answering, semantic search or conversational agent chatbots
284 '';
285 changelog = "https://github.com/deepset-ai/haystack/releases/tag/${src.rev}";
286 homepage = "https://github.com/deepset-ai/haystack";
287 license = licenses.asl20;
288 maintainers = with maintainers; [ happysalada ];
289 };
290}