1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5 # build inputs
6 numpy,
7 opencv4,
8 scipy,
9 pandas,
10 pillow,
11 pyyaml,
12 iopath,
13 pdfplumber,
14 pdf2image,
15 google-cloud-vision,
16 pytesseract,
17 torch,
18 torchvision,
19 effdet,
20 # check inputs
21 pytestCheckHook,
22}:
23let
24 pname = "layoutparser";
25 version = "0.3.4";
26 optional-dependencies = {
27 ocr = [
28 google-cloud-vision
29 pytesseract
30 ];
31 gcv = [ google-cloud-vision ];
32 tesseract = [ pytesseract ];
33 layoutmodels = [
34 torch
35 torchvision
36 effdet
37 ];
38 effdet = [
39 torch
40 torchvision
41 effdet
42 ];
43 # paddledetection = [ paddlepaddle ]
44 };
45in
46buildPythonPackage {
47 inherit pname version;
48 format = "setuptools";
49
50 src = fetchFromGitHub {
51 owner = "Layout-Parser";
52 repo = "layout-parser";
53 rev = "refs/tags/v${version}";
54 hash = "sha256-qBzcIUmgnGy/Xn/B+7UrLrRhCvCkapL+ymqGS2sMVgA=";
55 };
56
57 postPatch = ''
58 substituteInPlace setup.py \
59 --replace "opencv-python" "opencv"
60 '';
61
62 propagatedBuildInputs = [
63 numpy
64 opencv4
65 scipy
66 pandas
67 pillow
68 pyyaml
69 iopath
70 pdfplumber
71 pdf2image
72 ];
73
74 pythonImportsCheck = [ "layoutparser" ];
75
76 nativeCheckInputs = [ pytestCheckHook ] ++ optional-dependencies.ocr;
77
78 disabledTests = [
79 "test_PaddleDetectionModel" # requires paddlepaddle not yet packaged
80 # requires detectron2 not yet packaged
81 "test_Detectron2Model"
82 "test_AutoModel"
83 # requires effdet (disable for now until effdet builds on darwin)
84 "test_EffDetModel"
85 # problems with google-cloud-vision
86 # AttributeError: module 'google.cloud.vision' has no attribute 'types'
87 "test_gcv_agent"
88 "test_viz"
89 # - Failed: DID NOT RAISE <class 'ImportError'>
90 "test_when_backends_are_not_loaded"
91 ];
92
93 disabledTestPaths = [
94 "tests_deps/test_only_detectron2.py" # requires detectron2 not yet packaged
95 "tests_deps/test_only_effdet.py" # requires effdet (disable for now until effdet builds on darwin)
96 "tests_deps/test_only_paddledetection.py" # requires paddlepaddle not yet packaged
97 ];
98
99 passthru.optional-dependencies = optional-dependencies;
100
101 meta = with lib; {
102 description = "Unified toolkit for Deep Learning Based Document Image Analysis";
103 homepage = "https://github.com/Layout-Parser/layout-parser";
104 changelog = "https://github.com/Layout-Parser/layout-parser/releases/tag/v${version}";
105 license = licenses.asl20;
106 maintainers = with maintainers; [ happysalada ];
107 };
108}