1{
2 lib,
3 aiohttp,
4 buildPythonPackage,
5 dataclasses-json,
6 deprecated,
7 dirtyjson,
8 fetchFromGitHub,
9 filetype,
10 fsspec,
11 jsonpath-ng,
12 llamaindex-py-client,
13 nest-asyncio,
14 networkx,
15 nltk,
16 nltk-data,
17 numpy,
18 openai,
19 pandas,
20 pillow,
21 poetry-core,
22 pytest-asyncio,
23 pytest-mock,
24 pytestCheckHook,
25 pythonOlder,
26 pyvis,
27 pyyaml,
28 requests,
29 spacy,
30 sqlalchemy,
31 tenacity,
32 tiktoken,
33 tree-sitter,
34 typing-inspect,
35}:
36
37buildPythonPackage rec {
38 pname = "llama-index-core";
39 version = "0.12.23";
40 pyproject = true;
41
42 disabled = pythonOlder "3.8";
43
44 src = fetchFromGitHub {
45 owner = "run-llama";
46 repo = "llama_index";
47 tag = "v${version}";
48 hash = "sha256-GFzaorzjeQGreyUjRXP7v7djbSq2boLWZjwO4R2W9E4=";
49 };
50
51 sourceRoot = "${src.name}/${pname}";
52
53 # When `llama-index` is imported, it uses `nltk` to look for the following files and tries to
54 # download them if they aren't present.
55 # https://github.com/run-llama/llama_index/blob/6efa53cebd5c8ccf363582c932fffde44d61332e/llama-index-core/llama_index/core/utils.py#L59-L67
56 # Setting `NLTK_DATA` to a writable path can also solve this problem, but it needs to be done in
57 # every package that depends on `llama-index-core` for `pythonImportsCheck` not to fail, so this
58 # solution seems more elegant.
59 postPatch = ''
60 mkdir -p llama_index/core/_static/nltk_cache/corpora/stopwords/
61 cp -r ${nltk-data.stopwords}/corpora/stopwords/* llama_index/core/_static/nltk_cache/corpora/stopwords/
62
63 mkdir -p llama_index/core/_static/nltk_cache/tokenizers/punkt/
64 cp -r ${nltk-data.punkt}/tokenizers/punkt/* llama_index/core/_static/nltk_cache/tokenizers/punkt/
65 '';
66
67 pythonRelaxDeps = [ "tenacity" ];
68
69 build-system = [ poetry-core ];
70
71 dependencies = [
72 aiohttp
73 dataclasses-json
74 deprecated
75 dirtyjson
76 filetype
77 fsspec
78 jsonpath-ng
79 llamaindex-py-client
80 nest-asyncio
81 networkx
82 nltk
83 numpy
84 openai
85 pandas
86 pillow
87 pyvis
88 pyyaml
89 requests
90 spacy
91 sqlalchemy
92 tenacity
93 tiktoken
94 typing-inspect
95 ];
96
97 nativeCheckInputs = [
98 tree-sitter
99 pytest-asyncio
100 pytest-mock
101 pytestCheckHook
102 ];
103
104 pythonImportsCheck = [ "llama_index" ];
105
106 disabledTestPaths = [
107 # Tests require network access
108 "tests/agent/"
109 "tests/callbacks/"
110 "tests/chat_engine/"
111 "tests/evaluation/"
112 "tests/indices/"
113 "tests/ingestion/"
114 "tests/memory/"
115 "tests/node_parser/"
116 "tests/objects/"
117 "tests/playground/"
118 "tests/postprocessor/"
119 "tests/query_engine/"
120 "tests/question_gen/"
121 "tests/response_synthesizers/"
122 "tests/retrievers/"
123 "tests/selectors/"
124 "tests/test_utils.py"
125 "tests/text_splitter/"
126 "tests/token_predictor/"
127 "tests/tools/"
128 ];
129
130 disabledTests = [
131 # Tests require network access
132 "test_from_namespaced_persist_dir"
133 "test_from_persist_dir"
134 "test_context_extraction_basic"
135 "test_context_extraction_oversized_document"
136 "test_context_extraction_custom_prompt"
137 "test_multiple_documents_context"
138 "test_mimetype_raw_data"
139 # asyncio.exceptions.InvalidStateError: invalid state
140 "test_workflow_context_to_dict_mid_run"
141 "test_SimpleDirectoryReader"
142 ];
143
144 meta = with lib; {
145 description = "Data framework for your LLM applications";
146 homepage = "https://github.com/run-llama/llama_index/";
147 changelog = "https://github.com/run-llama/llama_index/blob/${src.tag}/CHANGELOG.md";
148 license = licenses.mit;
149 maintainers = with maintainers; [ fab ];
150 };
151}