Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
at devShellTools-shell 191 lines 3.7 kB view raw
1{ 2 lib, 3 stdenvNoCC, 4 fetchFromGitHub, 5 python3, 6 makeWrapper, 7 nix-update-script, 8}: 9let 10 pythonEnv = python3.withPackages ( 11 packages: 12 with packages; 13 [ 14 aiofiles 15 annotated-types 16 antlr4-python3-runtime 17 anyio 18 backoff 19 beautifulsoup4 20 cachetools 21 certifi 22 cffi 23 chardet 24 charset-normalizer 25 click 26 coloredlogs 27 contourpy 28 cryptography 29 cycler 30 dataclasses-json 31 deprecated 32 effdet 33 emoji 34 et-xmlfile 35 eval-type-backport 36 fastapi 37 filelock 38 filetype 39 flatbuffers 40 fonttools 41 fsspec 42 google-api-core 43 google-auth 44 google-cloud-vision 45 googleapis-common-protos 46 grpcio 47 grpcio-status 48 h11 49 html5lib 50 httpcore 51 httpx 52 huggingface-hub 53 humanfriendly 54 idna 55 iopath 56 jinja2 57 joblib 58 jsonpath 59 kiwisolver 60 langdetect 61 layoutparser 62 lxml 63 markdown 64 markupsafe 65 marshmallow 66 matplotlib 67 mpmath 68 mypy-extensions 69 nest-asyncio 70 networkx 71 nltk 72 numpy 73 olefile 74 omegaconf 75 onnx 76 onnxruntime 77 opencv-python 78 openpyxl 79 packaging 80 pandas 81 pdf2image 82 pdfminer-six 83 pdfplumber 84 # pi-heif 85 pikepdf 86 pillow 87 portalocker 88 proto-plus 89 protobuf 90 psutil 91 pyasn1 92 pyasn1-modules 93 pycocotools 94 pycparser 95 pycryptodome 96 pydantic 97 pydantic-core 98 pypandoc 99 pyparsing 100 pypdf 101 pypdfium2 102 python-dateutil 103 python-docx 104 # python-iso639 105 python-magic 106 python-multipart 107 # python-oxmsg 108 python-pptx 109 pytz 110 pyyaml 111 rapidfuzz 112 ratelimit 113 regex 114 requests 115 requests-toolbelt 116 rsa 117 safetensors 118 scipy 119 six 120 sniffio 121 soupsieve 122 starlette 123 sympy 124 timm 125 tokenizers 126 torch 127 torchvision 128 tqdm 129 transformers 130 typing-extensions 131 typing-inspect 132 tzdata 133 unstructured 134 # unstructured-client 135 unstructured-inference 136 # unstructured-pytesseract 137 urllib3 138 uvicorn 139 webencodings 140 wrapt 141 xlrd 142 xlsxwriter 143 ] 144 ++ google-api-core.optional-dependencies.grpc 145 ++ unstructured.optional-dependencies.all-docs 146 ); 147 version = "0.0.89"; 148 unstructured_api_nltk_data = python3.pkgs.nltk.dataDir (d: [ 149 d.punkt 150 d.averaged-perceptron-tagger 151 ]); 152in 153stdenvNoCC.mkDerivation { 154 pname = "unstructured-api"; 155 inherit version; 156 157 src = fetchFromGitHub { 158 owner = "Unstructured-IO"; 159 repo = "unstructured-api"; 160 rev = version; 161 hash = "sha256-FxWOR13wZwowZny2t4Frwl+cLMv+6nkHxQm9Xc4Y9Kw="; 162 }; 163 164 nativeBuildInputs = [ makeWrapper ]; 165 166 installPhase = '' 167 runHook preInstall 168 169 mkdir -p $out $out/bin $out/lib 170 cp -r . $out/lib 171 172 makeWrapper ${pythonEnv}/bin/uvicorn $out/bin/unstructured-api \ 173 --set NLTK_DATA ${unstructured_api_nltk_data} \ 174 --prefix PYTHONPATH : $out/lib \ 175 --add-flags "prepline_general.api.app:app" 176 177 runHook postInstall 178 ''; 179 180 passthru = { 181 updateScript = nix-update-script { }; 182 }; 183 184 meta = { 185 description = "Open-source toolkit designed to make it easy to prepare unstructured data like PDFs, HTML and Word Documents for downstream data science tasks"; 186 homepage = "https://github.com/Unstructured-IO/unstructured-api"; 187 changelog = "https://github.com/Unstructured-IO/unstructured-api/releases/tag/${version}"; 188 license = lib.licenses.asl20; 189 maintainers = with lib.maintainers; [ happysalada ]; 190 }; 191}