python310Packages.unstructured: init at 0.8.1

authored by

happysalada and committed by
Yt
35f43f95 8c82245e

+145
+143
pkgs/development/python-modules/unstructured/default.nix
··· 1 + { lib 2 + , buildPythonPackage 3 + , fetchFromGitHub 4 + # propagated build inputs 5 + , chardet 6 + , filetype 7 + , lxml 8 + , msg-parser 9 + , nltk 10 + , openpyxl 11 + , pandas 12 + , pdf2image 13 + , pdfminer-six 14 + , pillow 15 + , pypandoc 16 + , python-docx 17 + , python-pptx 18 + , python-magic 19 + , markdown 20 + , requests 21 + , tabulate 22 + , xlrd 23 + # optional-dependencies 24 + , langdetect 25 + , sacremoses 26 + , sentencepiece 27 + , torch 28 + , transformers 29 + , unstructured-inference 30 + , s3fs 31 + , fsspec 32 + , adlfs 33 + # , discord-py 34 + , pygithub 35 + , python-gitlab 36 + , praw 37 + , slack-sdk 38 + , wikipedia 39 + , google-api-python-client 40 + # , gcsfs 41 + , elasticsearch8 42 + , jq 43 + # , dropboxdrivefs 44 + , atlassian-python-api 45 + # test dependencies 46 + , pytestCheckHook 47 + , black 48 + , coverage 49 + , click 50 + , freezegun 51 + # , label-studio-sdk 52 + , mypy 53 + , pytest-cov 54 + , pytest-mock 55 + , vcrpy 56 + , grpcio 57 + }: 58 + let 59 + version = "0.8.1"; 60 + optional-dependencies = { 61 + huggingflace = [ 62 + langdetect 63 + sacremoses 64 + sentencepiece 65 + torch 66 + transformers 67 + ]; 68 + local-inference = [ unstructured-inference ]; 69 + s3 = [ s3fs fsspec ]; 70 + azure = [ adlfs fsspec ]; 71 + discord = [ ]; # discord-py 72 + github = [ pygithub ]; 73 + gitlab = [ python-gitlab ]; 74 + reddit = [ praw ]; 75 + slack = [ slack-sdk ]; 76 + wikipedia = [ wikipedia ]; 77 + google-drive = [ google-api-python-client ]; 78 + gcs = []; # gcsfs fsspec 79 + elasticsearch = [ elasticsearch8 jq ]; 80 + dropbox = []; # dropboxdrivefs fsspec 81 + confluence = [ atlassian-python-api ]; 82 + }; 83 + in 84 + buildPythonPackage { 85 + pname = "unstructured"; 86 + inherit version; 87 + format = "setuptools"; 88 + 89 + src = fetchFromGitHub { 90 + owner = "Unstructured-IO"; 91 + repo = "unstructured"; 92 + rev = version; 93 + hash = "sha256-I9pRycg3uGn7Xfd4YGxic16SXi8+gslsIVarzDT8X2w="; 94 + }; 95 + 96 + propagatedBuildInputs = [ 97 + chardet 98 + filetype 99 + lxml 100 + msg-parser 101 + nltk 102 + openpyxl 103 + pandas 104 + pdf2image 105 + pdfminer-six 106 + pillow 107 + pypandoc 108 + python-docx 109 + python-pptx 110 + python-magic 111 + markdown 112 + requests 113 + tabulate 114 + xlrd 115 + ]; 116 + 117 + pythonImportsCheck = [ "unstructured" ]; 118 + 119 + # test try to download punkt from nltk 120 + # figure out how to make it available to enable the tests 121 + doCheck = false; 122 + 123 + nativeCheckInputs = [ 124 + pytestCheckHook 125 + black 126 + coverage 127 + click 128 + freezegun 129 + mypy 130 + pytest-cov 131 + pytest-mock 132 + vcrpy 133 + grpcio 134 + ]; 135 + 136 + meta = with lib; { 137 + description = "Open source libraries and APIs to build custom preprocessing pipelines for labeling, training, or production machine learning pipelines"; 138 + homepage = "https://github.com/Unstructured-IO/unstructured"; 139 + changelog = "https://github.com/Unstructured-IO/unstructured/blob/${version}/CHANGELOG.md"; 140 + license = licenses.asl20; 141 + maintainers = with maintainers; [ happysalada ]; 142 + }; 143 + }
+2
pkgs/top-level/python-packages.nix
··· 13100 13100 13101 13101 unrpa = callPackage ../development/python-modules/unrpa { }; 13102 13102 13103 + unstructured = callPackage ../development/python-modules/unstructured { }; 13104 + 13103 13105 unstructured-inference = callPackage ../development/python-modules/unstructured-inference { }; 13104 13106 13105 13107 untangle = callPackage ../development/python-modules/untangle { };