1{
2 lib,
3 aiohttp,
4 buildPythonPackage,
5 dataclasses-json,
6 deprecated,
7 dirtyjson,
8 fetchFromGitHub,
9 fetchzip,
10 fsspec,
11 llamaindex-py-client,
12 nest-asyncio,
13 networkx,
14 nltk,
15 numpy,
16 openai,
17 pandas,
18 pillow,
19 poetry-core,
20 pytest-asyncio,
21 pytest-mock,
22 pytestCheckHook,
23 pythonOlder,
24 pyyaml,
25 requests,
26 tree-sitter,
27 sqlalchemy,
28 tenacity,
29 tiktoken,
30 typing-inspect,
31}:
32
33let
34 stopwords = fetchzip {
35 url = "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip";
36 hash = "sha256-tX1CMxSvFjr0nnLxbbycaX/IBnzHFxljMZceX5zElPY=";
37 };
38
39 punkt = fetchzip {
40 url = "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip";
41 hash = "sha256-SKZu26K17qMUg7iCFZey0GTECUZ+sTTrF/pqeEgJCos=";
42 };
43in
44
45buildPythonPackage rec {
46 pname = "llama-index-core";
47 version = "0.10.36";
48 pyproject = true;
49
50 disabled = pythonOlder "3.8";
51
52 src = fetchFromGitHub {
53 owner = "run-llama";
54 repo = "llama_index";
55 rev = "refs/tags/v${version}";
56 hash = "sha256-yP/60DLg43UOOogxbDvb1p5n8dnfBUjGhcfO5g5g0gA=";
57 };
58
59 sourceRoot = "${src.name}/${pname}";
60
61 # When `llama-index` is imported, it uses `nltk` to look for the following files and tries to
62 # download them if they aren't present.
63 # https://github.com/run-llama/llama_index/blob/6efa53cebd5c8ccf363582c932fffde44d61332e/llama-index-core/llama_index/core/utils.py#L59-L67
64 # Setting `NLTK_DATA` to a writable path can also solve this problem, but it needs to be done in
65 # every package that depends on `llama-index-core` for `pythonImportsCheck` not to fail, so this
66 # solution seems more elegant.
67 patchPhase = ''
68 mkdir -p llama_index/core/_static/nltk_cache/corpora/stopwords/
69 cp -r ${stopwords}/* llama_index/core/_static/nltk_cache/corpora/stopwords/
70
71 mkdir -p llama_index/core/_static/nltk_cache/tokenizers/punkt/
72 cp -r ${punkt}/* llama_index/core/_static/nltk_cache/tokenizers/punkt/
73 '';
74
75 build-system = [ poetry-core ];
76
77 dependencies = [
78 aiohttp
79 dataclasses-json
80 deprecated
81 dirtyjson
82 fsspec
83 llamaindex-py-client
84 nest-asyncio
85 networkx
86 nltk
87 numpy
88 openai
89 pandas
90 pillow
91 pyyaml
92 requests
93 sqlalchemy
94 tenacity
95 tiktoken
96 typing-inspect
97 ];
98
99 nativeCheckInputs = [
100 tree-sitter
101 pytest-asyncio
102 pytest-mock
103 pytestCheckHook
104 ];
105
106 pythonImportsCheck = [ "llama_index" ];
107
108 disabledTestPaths = [
109 # Tests require network access
110 "tests/agent/"
111 "tests/callbacks/"
112 "tests/chat_engine/"
113 "tests/evaluation/"
114 "tests/indices/"
115 "tests/ingestion/"
116 "tests/memory/"
117 "tests/node_parser/"
118 "tests/objects/"
119 "tests/playground/"
120 "tests/postprocessor/"
121 "tests/query_engine/"
122 "tests/question_gen/"
123 "tests/response_synthesizers/"
124 "tests/retrievers/"
125 "tests/selectors/"
126 "tests/test_utils.py"
127 "tests/text_splitter/"
128 "tests/token_predictor/"
129 "tests/tools/"
130 ];
131
132 meta = with lib; {
133 description = "Data framework for your LLM applications";
134 homepage = "https://github.com/run-llama/llama_index/";
135 changelog = "https://github.com/run-llama/llama_index/blob/${version}/CHANGELOG.md";
136 license = licenses.mit;
137 maintainers = with maintainers; [ fab ];
138 };
139}