at 24.11-pre 139 lines 3.3 kB view raw
1{ 2 lib, 3 aiohttp, 4 buildPythonPackage, 5 dataclasses-json, 6 deprecated, 7 dirtyjson, 8 fetchFromGitHub, 9 fetchzip, 10 fsspec, 11 llamaindex-py-client, 12 nest-asyncio, 13 networkx, 14 nltk, 15 numpy, 16 openai, 17 pandas, 18 pillow, 19 poetry-core, 20 pytest-asyncio, 21 pytest-mock, 22 pytestCheckHook, 23 pythonOlder, 24 pyyaml, 25 requests, 26 tree-sitter, 27 sqlalchemy, 28 tenacity, 29 tiktoken, 30 typing-inspect, 31}: 32 33let 34 stopwords = fetchzip { 35 url = "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip"; 36 hash = "sha256-tX1CMxSvFjr0nnLxbbycaX/IBnzHFxljMZceX5zElPY="; 37 }; 38 39 punkt = fetchzip { 40 url = "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip"; 41 hash = "sha256-SKZu26K17qMUg7iCFZey0GTECUZ+sTTrF/pqeEgJCos="; 42 }; 43in 44 45buildPythonPackage rec { 46 pname = "llama-index-core"; 47 version = "0.10.36"; 48 pyproject = true; 49 50 disabled = pythonOlder "3.8"; 51 52 src = fetchFromGitHub { 53 owner = "run-llama"; 54 repo = "llama_index"; 55 rev = "refs/tags/v${version}"; 56 hash = "sha256-yP/60DLg43UOOogxbDvb1p5n8dnfBUjGhcfO5g5g0gA="; 57 }; 58 59 sourceRoot = "${src.name}/${pname}"; 60 61 # When `llama-index` is imported, it uses `nltk` to look for the following files and tries to 62 # download them if they aren't present. 63 # https://github.com/run-llama/llama_index/blob/6efa53cebd5c8ccf363582c932fffde44d61332e/llama-index-core/llama_index/core/utils.py#L59-L67 64 # Setting `NLTK_DATA` to a writable path can also solve this problem, but it needs to be done in 65 # every package that depends on `llama-index-core` for `pythonImportsCheck` not to fail, so this 66 # solution seems more elegant. 67 patchPhase = '' 68 mkdir -p llama_index/core/_static/nltk_cache/corpora/stopwords/ 69 cp -r ${stopwords}/* llama_index/core/_static/nltk_cache/corpora/stopwords/ 70 71 mkdir -p llama_index/core/_static/nltk_cache/tokenizers/punkt/ 72 cp -r ${punkt}/* llama_index/core/_static/nltk_cache/tokenizers/punkt/ 73 ''; 74 75 build-system = [ poetry-core ]; 76 77 dependencies = [ 78 aiohttp 79 dataclasses-json 80 deprecated 81 dirtyjson 82 fsspec 83 llamaindex-py-client 84 nest-asyncio 85 networkx 86 nltk 87 numpy 88 openai 89 pandas 90 pillow 91 pyyaml 92 requests 93 sqlalchemy 94 tenacity 95 tiktoken 96 typing-inspect 97 ]; 98 99 nativeCheckInputs = [ 100 tree-sitter 101 pytest-asyncio 102 pytest-mock 103 pytestCheckHook 104 ]; 105 106 pythonImportsCheck = [ "llama_index" ]; 107 108 disabledTestPaths = [ 109 # Tests require network access 110 "tests/agent/" 111 "tests/callbacks/" 112 "tests/chat_engine/" 113 "tests/evaluation/" 114 "tests/indices/" 115 "tests/ingestion/" 116 "tests/memory/" 117 "tests/node_parser/" 118 "tests/objects/" 119 "tests/playground/" 120 "tests/postprocessor/" 121 "tests/query_engine/" 122 "tests/question_gen/" 123 "tests/response_synthesizers/" 124 "tests/retrievers/" 125 "tests/selectors/" 126 "tests/test_utils.py" 127 "tests/text_splitter/" 128 "tests/token_predictor/" 129 "tests/tools/" 130 ]; 131 132 meta = with lib; { 133 description = "Data framework for your LLM applications"; 134 homepage = "https://github.com/run-llama/llama_index/"; 135 changelog = "https://github.com/run-llama/llama_index/blob/${version}/CHANGELOG.md"; 136 license = licenses.mit; 137 maintainers = with maintainers; [ fab ]; 138 }; 139}