1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5 # runtime dependencies
6 layoutparser,
7 python-multipart,
8 huggingface-hub,
9 opencv,
10 onnxruntime,
11 transformers,
12 detectron2,
13 paddleocr,
14 # check inputs
15 pytestCheckHook,
16 coverage,
17 click,
18 httpx,
19 mypy,
20 pytest-cov,
21 pdf2image,
22}:
23
24buildPythonPackage rec {
25 pname = "unstructured-inference";
26 version = "0.7.24";
27 format = "setuptools";
28
29 src = fetchFromGitHub {
30 owner = "Unstructured-IO";
31 repo = "unstructured-inference";
32 rev = "refs/tags/${version}";
33 hash = "sha256-AxQHTUgE4CyiinT7HEh6fvbw+uVi7lKUgfOc1KZOezU=";
34 };
35
36 postPatch = ''
37 substituteInPlace requirements/base.in \
38 --replace "opencv-python" "opencv"
39 '';
40
41 propagatedBuildInputs =
42 [
43 layoutparser
44 python-multipart
45 huggingface-hub
46 opencv
47 onnxruntime
48 transformers
49 detectron2
50 paddleocr
51 # yolox
52 ]
53 ++ layoutparser.optional-dependencies.layoutmodels ++ layoutparser.optional-dependencies.tesseract;
54
55 nativeCheckInputs = [
56 pytestCheckHook
57 coverage
58 click
59 httpx
60 mypy
61 pytest-cov
62 pdf2image
63 huggingface-hub
64 ];
65
66 preCheck = ''
67 export HOME=$(mktemp -d)
68 '';
69
70 disabledTests = [
71 # not sure why this fails
72 "test_get_path_oob_move_deeply_nested"
73 "test_get_path_oob_move_nested[False]"
74 # requires yolox
75 "test_yolox"
76 ];
77
78 disabledTestPaths = [
79 # network access
80 "test_unstructured_inference/inference/test_layout.py"
81 "test_unstructured_inference/models/test_chippermodel.py"
82 "test_unstructured_inference/models/test_detectron2.py"
83 "test_unstructured_inference/models/test_detectron2onnx.py"
84 # unclear failure
85 "test_unstructured_inference/models/test_donut.py"
86 "test_unstructured_inference/models/test_model.py"
87 "test_unstructured_inference/models/test_tables.py"
88 ];
89
90 pythonImportsCheck = [ "unstructured_inference" ];
91
92 meta = with lib; {
93 description = "hosted model inference code for layout parsing models";
94 homepage = "https://github.com/Unstructured-IO/unstructured-inference";
95 changelog = "https://github.com/Unstructured-IO/unstructured-inference/blob/${src.rev}/CHANGELOG.md";
96 license = licenses.asl20;
97 maintainers = with maintainers; [ happysalada ];
98 platforms = [
99 "x86_64-linux"
100 "x86_64-darwin"
101 "aarch64-darwin"
102 ];
103 };
104}