pkgs/development/python-modules/llama-index-core/default.nix at 24.11-pre · pyrox.dev/nixpkgs

pyrox.dev / nixpkgs
lol
nixpkgs / pkgs / development / python-modules / llama-index-core / default.nix
at 24.11-pre 139 lines 3.3 kB view raw
  1{
  2  lib,
  3  aiohttp,
  4  buildPythonPackage,
  5  dataclasses-json,
  6  deprecated,
  7  dirtyjson,
  8  fetchFromGitHub,
  9  fetchzip,
 10  fsspec,
 11  llamaindex-py-client,
 12  nest-asyncio,
 13  networkx,
 14  nltk,
 15  numpy,
 16  openai,
 17  pandas,
 18  pillow,
 19  poetry-core,
 20  pytest-asyncio,
 21  pytest-mock,
 22  pytestCheckHook,
 23  pythonOlder,
 24  pyyaml,
 25  requests,
 26  tree-sitter,
 27  sqlalchemy,
 28  tenacity,
 29  tiktoken,
 30  typing-inspect,
 31}:
 32
 33let
 34  stopwords = fetchzip {
 35    url = "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip";
 36    hash = "sha256-tX1CMxSvFjr0nnLxbbycaX/IBnzHFxljMZceX5zElPY=";
 37  };
 38
 39  punkt = fetchzip {
 40    url = "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip";
 41    hash = "sha256-SKZu26K17qMUg7iCFZey0GTECUZ+sTTrF/pqeEgJCos=";
 42  };
 43in
 44
 45buildPythonPackage rec {
 46  pname = "llama-index-core";
 47  version = "0.10.36";
 48  pyproject = true;
 49
 50  disabled = pythonOlder "3.8";
 51
 52  src = fetchFromGitHub {
 53    owner = "run-llama";
 54    repo = "llama_index";
 55    rev = "refs/tags/v${version}";
 56    hash = "sha256-yP/60DLg43UOOogxbDvb1p5n8dnfBUjGhcfO5g5g0gA=";
 57  };
 58
 59  sourceRoot = "${src.name}/${pname}";
 60
 61  # When `llama-index` is imported, it uses `nltk` to look for the following files and tries to
 62  # download them if they aren't present.
 63  # https://github.com/run-llama/llama_index/blob/6efa53cebd5c8ccf363582c932fffde44d61332e/llama-index-core/llama_index/core/utils.py#L59-L67
 64  # Setting `NLTK_DATA` to a writable path can also solve this problem, but it needs to be done in
 65  # every package that depends on `llama-index-core` for `pythonImportsCheck` not to fail, so this
 66  # solution seems more elegant.
 67  patchPhase = ''
 68    mkdir -p llama_index/core/_static/nltk_cache/corpora/stopwords/
 69    cp -r ${stopwords}/* llama_index/core/_static/nltk_cache/corpora/stopwords/
 70
 71    mkdir -p llama_index/core/_static/nltk_cache/tokenizers/punkt/
 72    cp -r ${punkt}/* llama_index/core/_static/nltk_cache/tokenizers/punkt/
 73  '';
 74
 75  build-system = [ poetry-core ];
 76
 77  dependencies = [
 78    aiohttp
 79    dataclasses-json
 80    deprecated
 81    dirtyjson
 82    fsspec
 83    llamaindex-py-client
 84    nest-asyncio
 85    networkx
 86    nltk
 87    numpy
 88    openai
 89    pandas
 90    pillow
 91    pyyaml
 92    requests
 93    sqlalchemy
 94    tenacity
 95    tiktoken
 96    typing-inspect
 97  ];
 98
 99  nativeCheckInputs = [
100    tree-sitter
101    pytest-asyncio
102    pytest-mock
103    pytestCheckHook
104  ];
105
106  pythonImportsCheck = [ "llama_index" ];
107
108  disabledTestPaths = [
109    # Tests require network access
110    "tests/agent/"
111    "tests/callbacks/"
112    "tests/chat_engine/"
113    "tests/evaluation/"
114    "tests/indices/"
115    "tests/ingestion/"
116    "tests/memory/"
117    "tests/node_parser/"
118    "tests/objects/"
119    "tests/playground/"
120    "tests/postprocessor/"
121    "tests/query_engine/"
122    "tests/question_gen/"
123    "tests/response_synthesizers/"
124    "tests/retrievers/"
125    "tests/selectors/"
126    "tests/test_utils.py"
127    "tests/text_splitter/"
128    "tests/token_predictor/"
129    "tests/tools/"
130  ];
131
132  meta = with lib; {
133    description = "Data framework for your LLM applications";
134    homepage = "https://github.com/run-llama/llama_index/";
135    changelog = "https://github.com/run-llama/llama_index/blob/${version}/CHANGELOG.md";
136    license = licenses.mit;
137    maintainers = with maintainers; [ fab ];
138  };
139}