1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5 # runtime dependencies
6 layoutparser,
7 python-multipart,
8 huggingface-hub,
9 opencv-python,
10 onnxruntime,
11 transformers,
12 detectron2,
13 paddleocr,
14 # check inputs
15 pytestCheckHook,
16 coverage,
17 click,
18 httpx,
19 mypy,
20 pytest-cov-stub,
21 pdf2image,
22}:
23
24buildPythonPackage rec {
25 pname = "unstructured-inference";
26 version = "0.8.10";
27 format = "setuptools";
28
29 src = fetchFromGitHub {
30 owner = "Unstructured-IO";
31 repo = "unstructured-inference";
32 tag = version;
33 hash = "sha256-c5brGGY1PPuKYUb1UOvyKFZC4ph0pGRagHuTLKPXErY=";
34 };
35
36 propagatedBuildInputs =
37 [
38 layoutparser
39 python-multipart
40 huggingface-hub
41 opencv-python
42 onnxruntime
43 transformers
44 # detectron2 # fails to build
45 # paddleocr # 3.12 not yet supported
46 # yolox
47 ]
48 ++ layoutparser.optional-dependencies.layoutmodels
49 ++ layoutparser.optional-dependencies.tesseract;
50
51 nativeCheckInputs = [
52 pytestCheckHook
53 coverage
54 click
55 httpx
56 mypy
57 pytest-cov-stub
58 pdf2image
59 huggingface-hub
60 ];
61
62 # This dependency needs to be updated properly
63 doCheck = false;
64
65 preCheck = ''
66 export HOME=$(mktemp -d)
67 '';
68
69 disabledTests = [
70 # not sure why this fails
71 "test_get_path_oob_move_deeply_nested"
72 "test_get_path_oob_move_nested[False]"
73 # requires yolox
74 "test_yolox"
75 ];
76
77 disabledTestPaths = [
78 # network access
79 "test_unstructured_inference/inference/test_layout.py"
80 "test_unstructured_inference/models/test_chippermodel.py"
81 "test_unstructured_inference/models/test_detectron2onnx.py"
82 # unclear failure
83 "test_unstructured_inference/models/test_donut.py"
84 "test_unstructured_inference/models/test_model.py"
85 "test_unstructured_inference/models/test_tables.py"
86 ];
87
88 pythonImportsCheck = [ "unstructured_inference" ];
89
90 meta = with lib; {
91 description = "hosted model inference code for layout parsing models";
92 homepage = "https://github.com/Unstructured-IO/unstructured-inference";
93 changelog = "https://github.com/Unstructured-IO/unstructured-inference/blob/${src.tag}/CHANGELOG.md";
94 license = licenses.asl20;
95 maintainers = with maintainers; [ happysalada ];
96 platforms = [
97 "x86_64-linux"
98 "x86_64-darwin"
99 "aarch64-darwin"
100 ];
101 };
102}