1{ lib
2, buildPythonPackage
3, fetchFromGitHub
4# runtime dependencies
5, layoutparser
6, python-multipart
7, huggingface-hub
8, opencv
9, onnxruntime
10, transformers
11, detectron2
12, paddleocr
13# check inputs
14, pytestCheckHook
15, coverage
16, click
17, httpx
18, mypy
19, pytest-cov
20, pdf2image
21}:
22
23buildPythonPackage rec {
24 pname = "unstructured-inference";
25 version = "0.7.11";
26 format = "setuptools";
27
28 src = fetchFromGitHub {
29 owner = "Unstructured-IO";
30 repo = "unstructured-inference";
31 rev = "refs/tags/${version}";
32 hash = "sha256-cUd1umD61xHPehutBh5pUWTLyOdn3vbgerRQmsOpuDM=";
33 };
34
35 postPatch = ''
36 substituteInPlace requirements/base.in \
37 --replace "opencv-python" "opencv"
38 '';
39
40 propagatedBuildInputs = [
41 layoutparser
42 python-multipart
43 huggingface-hub
44 opencv
45 onnxruntime
46 transformers
47 detectron2
48 paddleocr
49 # yolox
50 ]
51 ++ layoutparser.optional-dependencies.layoutmodels
52 ++ layoutparser.optional-dependencies.tesseract;
53
54 nativeCheckInputs = [
55 pytestCheckHook
56 coverage
57 click
58 httpx
59 mypy
60 pytest-cov
61 pdf2image
62 huggingface-hub
63 ];
64
65 preCheck = ''
66 export HOME=$(mktemp -d)
67 '';
68
69 disabledTests = [
70 # not sure why this fails
71 "test_get_path_oob_move_deeply_nested"
72 "test_get_path_oob_move_nested[False]"
73 # requires yolox
74 "test_yolox"
75 ];
76
77 disabledTestPaths = [
78 # network access
79 "test_unstructured_inference/inference/test_layout.py"
80 "test_unstructured_inference/models/test_chippermodel.py"
81 "test_unstructured_inference/models/test_detectron2.py"
82 "test_unstructured_inference/models/test_detectron2onnx.py"
83 # unclear failure
84 "test_unstructured_inference/models/test_donut.py"
85 "test_unstructured_inference/models/test_model.py"
86 "test_unstructured_inference/models/test_tables.py"
87 ];
88
89 pythonImportsCheck = [ "unstructured_inference" ];
90
91 meta = with lib; {
92 description = "hosted model inference code for layout parsing models";
93 homepage = "https://github.com/Unstructured-IO/unstructured-inference";
94 changelog = "https://github.com/Unstructured-IO/unstructured-inference/blob/${src.rev}/CHANGELOG.md";
95 license = licenses.asl20;
96 maintainers = with maintainers; [ happysalada ];
97 platforms = [ "x86_64-linux" "x86_64-darwin" "aarch64-darwin" ];
98 };
99}