1{
2 lib,
3 aiohttp,
4 buildPythonPackage,
5 dataclasses-json,
6 deprecated,
7 dirtyjson,
8 fetchFromGitHub,
9 fetchzip,
10 fsspec,
11 jsonpath-ng,
12 llamaindex-py-client,
13 nest-asyncio,
14 networkx,
15 nltk,
16 numpy,
17 openai,
18 pandas,
19 pillow,
20 poetry-core,
21 pytest-asyncio,
22 pytest-mock,
23 pytestCheckHook,
24 pythonOlder,
25 pyyaml,
26 requests,
27 spacy,
28 sqlalchemy,
29 tenacity,
30 tiktoken,
31 tree-sitter,
32 typing-inspect,
33}:
34
35let
36 stopwords = fetchzip {
37 url = "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip";
38 hash = "sha256-tX1CMxSvFjr0nnLxbbycaX/IBnzHFxljMZceX5zElPY=";
39 };
40
41 punkt = fetchzip {
42 url = "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip";
43 hash = "sha256-SKZu26K17qMUg7iCFZey0GTECUZ+sTTrF/pqeEgJCos=";
44 };
45in
46
47buildPythonPackage rec {
48 pname = "llama-index-core";
49 version = "0.10.48.post1";
50 pyproject = true;
51
52 disabled = pythonOlder "3.8";
53
54 src = fetchFromGitHub {
55 owner = "run-llama";
56 repo = "llama_index";
57 rev = "refs/tags/v${version}";
58 hash = "sha256-O8mHttYMRUzWvhydQsOux7tynhDuMKapsSDJQXL0MRQ=";
59 };
60
61 sourceRoot = "${src.name}/${pname}";
62
63 # When `llama-index` is imported, it uses `nltk` to look for the following files and tries to
64 # download them if they aren't present.
65 # https://github.com/run-llama/llama_index/blob/6efa53cebd5c8ccf363582c932fffde44d61332e/llama-index-core/llama_index/core/utils.py#L59-L67
66 # Setting `NLTK_DATA` to a writable path can also solve this problem, but it needs to be done in
67 # every package that depends on `llama-index-core` for `pythonImportsCheck` not to fail, so this
68 # solution seems more elegant.
69 patchPhase = ''
70 mkdir -p llama_index/core/_static/nltk_cache/corpora/stopwords/
71 cp -r ${stopwords}/* llama_index/core/_static/nltk_cache/corpora/stopwords/
72
73 mkdir -p llama_index/core/_static/nltk_cache/tokenizers/punkt/
74 cp -r ${punkt}/* llama_index/core/_static/nltk_cache/tokenizers/punkt/
75 '';
76
77 build-system = [ poetry-core ];
78
79 dependencies = [
80 aiohttp
81 dataclasses-json
82 deprecated
83 dirtyjson
84 fsspec
85 jsonpath-ng
86 llamaindex-py-client
87 nest-asyncio
88 networkx
89 nltk
90 numpy
91 openai
92 pandas
93 pillow
94 pyyaml
95 requests
96 spacy
97 sqlalchemy
98 tenacity
99 tiktoken
100 typing-inspect
101 ];
102
103 nativeCheckInputs = [
104 tree-sitter
105 pytest-asyncio
106 pytest-mock
107 pytestCheckHook
108 ];
109
110 pythonImportsCheck = [ "llama_index" ];
111
112 disabledTestPaths = [
113 # Tests require network access
114 "tests/agent/"
115 "tests/callbacks/"
116 "tests/chat_engine/"
117 "tests/evaluation/"
118 "tests/indices/"
119 "tests/ingestion/"
120 "tests/memory/"
121 "tests/node_parser/"
122 "tests/objects/"
123 "tests/playground/"
124 "tests/postprocessor/"
125 "tests/query_engine/"
126 "tests/question_gen/"
127 "tests/response_synthesizers/"
128 "tests/retrievers/"
129 "tests/selectors/"
130 "tests/test_utils.py"
131 "tests/text_splitter/"
132 "tests/token_predictor/"
133 "tests/tools/"
134 ];
135
136 meta = with lib; {
137 description = "Data framework for your LLM applications";
138 homepage = "https://github.com/run-llama/llama_index/";
139 changelog = "https://github.com/run-llama/llama_index/blob/${version}/CHANGELOG.md";
140 license = licenses.mit;
141 maintainers = with maintainers; [ fab ];
142 };
143}