1{
2 lib
3, buildPythonPackage
4, pythonOlder
5, fetchFromGitHub
6, pythonRelaxDepsHook
7# propagated build input
8, faiss
9, torch
10, transformers
11, huggingface-hub
12, numpy
13, pyyaml
14, regex
15# optional-dependencies
16, aiohttp
17, fastapi
18, uvicorn
19# TODO add apache-libcloud
20# , apache-libcloud
21, rich
22, duckdb
23, pillow
24, networkx
25, python-louvain
26, onnx
27, onnxruntime
28, soundfile
29, scipy
30, ttstokenizer
31, beautifulsoup4
32, nltk
33, pandas
34, tika
35, imagehash
36, timm
37, fasttext
38, sentencepiece
39, accelerate
40, onnxmltools
41, annoy
42, hnswlib
43# TODO add pymagnitude-lite
44#, pymagnitude-lite
45, scikit-learn
46, sentence-transformers
47, croniter
48, openpyxl
49, requests
50, xmltodict
51# native check inputs
52, unittestCheckHook
53}:
54let
55 version = "6.2.0";
56 api = [ aiohttp fastapi uvicorn ];
57 # cloud = [ apache-libcloud ];
58 console = [ rich ];
59
60 database = [ duckdb pillow ];
61
62 graph = [ networkx python-louvain ];
63
64 model = [ onnx onnxruntime ];
65
66 pipeline-audio = [ onnx onnxruntime soundfile scipy ttstokenizer ];
67 pipeline-data = [ beautifulsoup4 nltk pandas tika ];
68 pipeline-image = [ imagehash pillow timm ];
69 pipeline-text = [ fasttext sentencepiece ];
70 pipeline-train = [ accelerate onnx onnxmltools onnxruntime ];
71 pipeline = pipeline-audio ++ pipeline-data ++ pipeline-image ++ pipeline-text ++ pipeline-train;
72
73 similarity = [
74 annoy
75 fasttext
76 hnswlib
77 # pymagnitude-lite
78 scikit-learn
79 sentence-transformers
80 ];
81 workflow = [
82 # apache-libcloud
83 croniter
84 openpyxl
85 pandas
86 pillow
87 requests
88 xmltodict
89 ];
90 all = api ++ console ++ database ++ graph ++ model ++ pipeline ++ similarity ++ workflow;
91
92 optional-dependencies = {
93 inherit api console database graph model pipeline-audio pipeline-image
94 pipeline-text pipeline-train pipeline similarity workflow all;
95 };
96in
97buildPythonPackage {
98 pname = "txtai";
99 inherit version;
100 format = "setuptools";
101
102 disabled = pythonOlder "3.8";
103
104 src = fetchFromGitHub {
105 owner = "neuml";
106 repo = "txtai";
107 rev = "refs/tags/v${version}";
108 hash = "sha256-aWuY2z5DIVhZ5bRADhKSadCofIQQdLQAb52HnjPMS/4=";
109 };
110
111 nativeBuildInputs = [
112 pythonRelaxDepsHook
113 ];
114
115 pythonRemoveDeps = [
116 # We call it faiss, not faiss-cpu.
117 "faiss-cpu"
118 ];
119
120 propagatedBuildInputs = [
121 faiss
122 torch
123 transformers
124 huggingface-hub
125 numpy
126 pyyaml
127 regex
128 ];
129
130 passthru.optional-dependencies = optional-dependencies;
131
132 pythonImportsCheck = [ "txtai" ];
133
134 # some tests hang forever
135 doCheck = false;
136
137 preCheck = ''
138 export TRANSFORMERS_CACHE=$(mktemp -d)
139 '';
140
141 nativeCheckInputs = [
142 unittestCheckHook
143 ] ++ optional-dependencies.api ++ optional-dependencies.similarity;
144
145 unittestFlagsArray = [
146 "-s" "test/python" "-v"
147 ];
148
149 meta = with lib; {
150 description = "Semantic search and workflows powered by language models";
151 changelog = "https://github.com/neuml/txtai/releases/tag/v${version}";
152 homepage = "https://github.com/neuml/txtai";
153 license = licenses.asl20;
154 maintainers = with maintainers; [ happysalada ];
155 };
156}