1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5 attrdict,
6 beautifulsoup4,
7 cython,
8 fire,
9 fonttools,
10 lmdb,
11 lxml,
12 numpy,
13 opencv-python,
14 openpyxl,
15 pdf2docx,
16 pillow,
17 pyclipper,
18 pymupdf,
19 python-docx,
20 rapidfuzz,
21 scikit-image,
22 shapely,
23 tqdm,
24 paddlepaddle,
25 lanms-neo,
26 polygon3,
27}:
28
29let
30 version = "2.9.1";
31in
32buildPythonPackage rec {
33 pname = "paddleocr";
34 inherit version;
35 format = "setuptools";
36
37 src = fetchFromGitHub {
38 owner = "PaddlePaddle";
39 repo = "PaddleOCR";
40 tag = "v${version}";
41 hash = "sha256-QCddxgVdLaAJLfKCy+tnQsxownfl1Uv0TXhFRiFi9cY=";
42 };
43
44 patches = [
45 # The `ppocr.data.imaug` re-exports the `IaaAugment` and `CopyPaste`
46 # classes. These classes depend on the `imgaug` package which is
47 # unmaintained and has been removed from nixpkgs.
48 #
49 # The image OCR feature of PaddleOCR doesn't use these classes though, so
50 # they work even after stripping the the `IaaAugment` and `CopyPaste`
51 # exports. It probably breaks some of the OCR model creation tooling that
52 # PaddleOCR provides, however.
53 ./remove-import-imaug.patch
54 ];
55
56 # trying to relax only pymupdf makes the whole build fail
57 pythonRelaxDeps = true;
58 pythonRemoveDeps = [
59 "imgaug"
60 "visualdl"
61 "opencv-contrib-python"
62 ];
63
64 propagatedBuildInputs = [
65 attrdict
66 beautifulsoup4
67 cython
68 fire
69 fonttools
70 lmdb
71 lxml
72 numpy
73 opencv-python
74 openpyxl
75 pdf2docx
76 pillow
77 pyclipper
78 pymupdf
79 python-docx
80 rapidfuzz
81 scikit-image
82 shapely
83 tqdm
84 paddlepaddle
85 lanms-neo
86 polygon3
87 ];
88
89 # TODO: The tests depend, among possibly other things, on `cudatoolkit`.
90 # But Cudatoolkit fails to install.
91 # preCheck = "export HOME=$TMPDIR";
92 # nativeCheckInputs = with pkgs; [ which cudatoolkit ];
93 doCheck = false;
94
95 meta = with lib; {
96 homepage = "https://github.com/PaddlePaddle/PaddleOCR";
97 license = licenses.asl20;
98 description = "Multilingual OCR toolkits based on PaddlePaddle";
99 longDescription = ''
100 PaddleOCR aims to create multilingual, awesome, leading, and practical OCR
101 tools that help users train better models and apply them into practice.
102 '';
103 changelog = "https://github.com/PaddlePaddle/PaddleOCR/releases/tag/${src.tag}";
104 maintainers = with maintainers; [ happysalada ];
105 platforms = [
106 "x86_64-linux"
107 "x86_64-darwin"
108 "aarch64-darwin"
109 ];
110 };
111}