1{ 2 lib, 3 buildPythonPackage, 4 fetchFromGitHub, 5 pythonRelaxDepsHook, 6 hatchling, 7 boilerpy3, 8 events, 9 httpx, 10 jsonschema, 11 lazy-imports, 12 more-itertools, 13 networkx, 14 pandas, 15 pillow, 16 platformdirs, 17 posthog, 18 prompthub-py, 19 pydantic, 20 quantulum3, 21 rank-bm25, 22 requests, 23 requests-cache, 24 scikit-learn, 25 sseclient-py, 26 tenacity, 27 tiktoken, 28 tqdm, 29 transformers, 30 openai-whisper, 31 boto3, 32 botocore, 33 # , beir 34 selenium, 35 coverage, 36 dulwich, 37 # , jupytercontrib 38 mkdocs, 39 mypy, 40 pre-commit, 41 psutil, 42 # , pydoc-markdown 43 pylint, 44 pytest, 45 pytest-asyncio, 46 pytest-cov, 47 # , pytest-custom-exit-code 48 python-multipart, 49 reno, 50 responses, 51 toml, 52 tox, 53 watchdog, 54 elastic-transport, 55 elasticsearch, 56 # , azure-ai-formrecognizer 57 beautifulsoup4, 58 markdown, 59 python-docx, 60 python-frontmatter, 61 python-magic, 62 tika, 63 black, 64 huggingface-hub, 65 sentence-transformers, 66 mlflow, 67 rapidfuzz, 68 scipy, 69 seqeval, 70 pdf2image, 71 pytesseract, 72 faiss, 73 # , faiss-gpu 74 pinecone-client, 75 onnxruntime, 76 onnxruntime-tools, 77 # , onnxruntime-gpu 78 opensearch-py, 79 pymupdf, 80 langdetect, 81 nltk, 82 canals, 83 jinja2, 84 openai, 85 aiorwlock, 86 ray, 87 psycopg2, 88 sqlalchemy, 89 sqlalchemy-utils, 90 weaviate-client, 91}: 92 93buildPythonPackage rec { 94 pname = "farm-haystack"; 95 version = "1.25.0"; 96 pyproject = true; 97 98 src = fetchFromGitHub { 99 owner = "deepset-ai"; 100 repo = "haystack"; 101 rev = "refs/tags/v${version}"; 102 hash = "sha256-lAXzWnHpOBVjgleFTYqlZ34hmZkcrPJ/h1kk4iVVvec="; 103 }; 104 105 nativeBuildInputs = [ 106 hatchling 107 pythonRelaxDepsHook 108 ]; 109 110 pythonRemoveDeps = [ 111 # We call it faiss, not faiss-cpu. 112 "faiss-cpu" 113 ]; 114 115 propagatedBuildInputs = [ 116 boilerpy3 117 events 118 httpx 119 jsonschema 120 lazy-imports 121 more-itertools 122 networkx 123 pandas 124 pillow 125 platformdirs 126 posthog 127 prompthub-py 128 pydantic 129 quantulum3 130 rank-bm25 131 requests 132 requests-cache 133 scikit-learn 134 sseclient-py 135 tenacity 136 tiktoken 137 tqdm 138 transformers 139 ]; 140 141 env.HOME = "$(mktemp -d)"; 142 143 passthru.optional-dependencies = { 144 # all = [ 145 # farm-haystack 146 # ]; 147 # all-gpu = [ 148 # farm-haystack 149 # ]; 150 audio = [ openai-whisper ]; 151 aws = [ 152 boto3 153 botocore 154 ]; 155 # beir = [ 156 # beir 157 # ]; 158 colab = [ pillow ]; 159 crawler = [ selenium ]; 160 dev = [ 161 coverage 162 dulwich 163 # jupytercontrib 164 mkdocs 165 mypy 166 pre-commit 167 psutil 168 # pydoc-markdown 169 pylint 170 pytest 171 pytest-asyncio 172 pytest-cov 173 # pytest-custom-exit-code 174 python-multipart 175 reno 176 responses 177 toml 178 tox 179 watchdog 180 ]; 181 elasticsearch7 = [ 182 elastic-transport 183 elasticsearch 184 ]; 185 elasticsearch8 = [ 186 elastic-transport 187 elasticsearch 188 ]; 189 file-conversion = [ 190 # azure-ai-formrecognizer 191 beautifulsoup4 192 markdown 193 python-docx 194 python-frontmatter 195 python-magic 196 # python-magic-bin 197 tika 198 ]; 199 formatting = [ black ]; 200 inference = [ 201 huggingface-hub 202 sentence-transformers 203 transformers 204 ]; 205 metrics = [ 206 mlflow 207 rapidfuzz 208 scipy 209 seqeval 210 ]; 211 ocr = [ 212 pdf2image 213 pytesseract 214 ]; 215 only-faiss = [ faiss ]; 216 # only-faiss-gpu = [ 217 # faiss-gpu 218 # ]; 219 only-pinecone = [ pinecone-client ]; 220 onnx = [ 221 onnxruntime 222 onnxruntime-tools 223 ]; 224 # onnx-gpu = [ 225 # onnxruntime-gpu 226 # onnxruntime-tools 227 # ]; 228 opensearch = [ opensearch-py ]; 229 pdf = [ pymupdf ]; 230 preprocessing = [ 231 langdetect 232 nltk 233 ]; 234 preview = [ 235 canals 236 jinja2 237 lazy-imports 238 openai 239 pandas 240 rank-bm25 241 requests 242 tenacity 243 tqdm 244 ]; 245 ray = [ 246 aiorwlock 247 ray 248 ]; 249 sql = [ 250 psycopg2 251 sqlalchemy 252 sqlalchemy-utils 253 ]; 254 weaviate = [ weaviate-client ]; 255 }; 256 257 # the setup for test is intensive, hopefully can be done at some point 258 doCheck = false; 259 260 pythonImportsCheck = [ "haystack" ]; 261 262 meta = with lib; { 263 description = "LLM orchestration framework to build customizable, production-ready LLM applications"; 264 longDescription = '' 265 LLM orchestration framework to build customizable, production-ready LLM applications. Connect components (models, vector DBs, file converters) to pipelines or agents that can interact with your data. With advanced retrieval methods, it's best suited for building RAG, question answering, semantic search or conversational agent chatbots 266 ''; 267 changelog = "https://github.com/deepset-ai/haystack/releases/tag/${src.rev}"; 268 homepage = "https://github.com/deepset-ai/haystack"; 269 license = licenses.asl20; 270 maintainers = with maintainers; [ happysalada ]; 271 # https://github.com/deepset-ai/haystack/issues/5304 272 broken = versionAtLeast pydantic.version "2"; 273 }; 274}