nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5 setuptools,
6 setuptools-scm,
7 attrdict,
8 beautifulsoup4,
9 cython,
10 fire,
11 fonttools,
12 lmdb,
13 lxml,
14 numpy,
15 opencv-python,
16 openpyxl,
17 pdf2docx,
18 pillow,
19 pyclipper,
20 pymupdf,
21 python-docx,
22 rapidfuzz,
23 scikit-image,
24 shapely,
25 tqdm,
26 paddlepaddle,
27 lanms-neo,
28 polygon3,
29 paddlex,
30 pyyaml,
31}:
32
33buildPythonPackage rec {
34 pname = "paddleocr";
35 version = "3.3.3";
36 pyproject = true;
37
38 src = fetchFromGitHub {
39 owner = "PaddlePaddle";
40 repo = "PaddleOCR";
41 tag = "v${version}";
42 hash = "sha256-D+ubKviK6Nx+PlvBkTvCfKomZ9og0YrdNZNxMqWlPik=";
43 };
44
45 patches = [
46 # The `ppocr.data.imaug` re-exports the `IaaAugment` and `CopyPaste`
47 # classes. These classes depend on the `imgaug` package which is
48 # unmaintained and has been removed from nixpkgs.
49 #
50 # The image OCR feature of PaddleOCR doesn't use these classes though, so
51 # they work even after stripping the the `IaaAugment` and `CopyPaste`
52 # exports. It probably breaks some of the OCR model creation tooling that
53 # PaddleOCR provides, however.
54 ./remove-import-imaug.patch
55 ];
56
57 postPatch = ''
58 substituteInPlace pyproject.toml \
59 --replace-fail "setuptools==72.1.0" "setuptools"
60 '';
61
62 build-system = [
63 setuptools
64 setuptools-scm
65 ];
66
67 dependencies = [
68 attrdict
69 beautifulsoup4
70 cython
71 fire
72 fonttools
73 lmdb
74 lxml
75 numpy
76 opencv-python
77 openpyxl
78 pdf2docx
79 pillow
80 pyclipper
81 pymupdf
82 python-docx
83 rapidfuzz
84 scikit-image
85 shapely
86 tqdm
87 paddlepaddle
88 lanms-neo
89 polygon3
90 paddlex
91 pyyaml
92 ];
93
94 # TODO: The tests depend, among possibly other things, on `cudatoolkit`.
95 # But Cudatoolkit fails to install.
96 # preCheck = "export HOME=$TMPDIR";
97 # nativeCheckInputs = with pkgs; [ which cudatoolkit ];
98 doCheck = false;
99
100 meta = {
101 homepage = "https://github.com/PaddlePaddle/PaddleOCR";
102 license = lib.licenses.asl20;
103 description = "Multilingual OCR toolkits based on PaddlePaddle";
104 longDescription = ''
105 PaddleOCR aims to create multilingual, awesome, leading, and practical OCR
106 tools that help users train better models and apply them into practice.
107 '';
108 changelog = "https://github.com/PaddlePaddle/PaddleOCR/releases/tag/${src.tag}";
109 maintainers = with lib.maintainers; [ happysalada ];
110 platforms = [
111 "x86_64-linux"
112 "x86_64-darwin"
113 "aarch64-darwin"
114 ];
115 };
116}