1{
2 lib,
3 stdenvNoCC,
4 fetchFromGitHub,
5 python3,
6 makeWrapper,
7 nix-update-script,
8 symlinkJoin,
9 nltk-data,
10}:
11let
12 pythonEnv = python3.withPackages (packages: with packages; [
13 unstructured-api-tools
14 unstructured
15 pydantic
16 click
17 ratelimit
18 requests
19 pypdf
20 pycryptodome
21 safetensors
22 uvicorn
23 ] ++ packages.unstructured.optional-dependencies.local-inference);
24 version = "0.0.42";
25 unstructured_api_nltk_data = symlinkJoin {
26 name = "unstructured_api_nltk_data";
27
28 paths = [ nltk-data.punkt nltk-data.averaged_perceptron_tagger ];
29 };
30in stdenvNoCC.mkDerivation {
31 pname = "unstructured-api";
32 inherit version;
33
34 src = fetchFromGitHub {
35 owner = "Unstructured-IO";
36 repo = "unstructured-api";
37 rev = version;
38 hash = "sha256-Tn4o7gAIlvWUTbAmbTCF9LgMk0up16iWuNPTy6pxOuk=";
39 };
40
41 nativeBuildInputs = [ makeWrapper ];
42
43 installPhase = ''
44 runHook preInstall
45
46 mkdir -p $out $out/bin $out/lib
47 cp -r . $out/lib
48
49 makeWrapper ${pythonEnv}/bin/uvicorn $out/bin/unstructured-api \
50 --set NLTK_DATA ${unstructured_api_nltk_data} \
51 --prefix PYTHONPATH : $out/lib \
52 --add-flags "prepline_general.api.app:app"
53
54 runHook postInstall
55 '';
56
57 passthru = {
58 updateScript = nix-update-script { };
59 };
60
61 meta = with lib; {
62 description = "open-source toolkit designed to make it easy to prepare unstructured data like PDFs, HTML and Word Documents for downstream data science tasks";
63 homepage = "https://github.com/Unstructured-IO/unstructured-api";
64 changelog = "https://github.com/Unstructured-IO/unstructured-api/releases/tag/${version}";
65 license = licenses.asl20;
66 maintainers = with maintainers; [ happysalada ];
67 };
68}