1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5
6 fetchzip,
7 replaceVars,
8
9 setuptools,
10 pyclipper,
11 opencv-python,
12 numpy,
13 six,
14 shapely,
15 pyyaml,
16 pillow,
17 onnxruntime,
18 tqdm,
19
20 pytestCheckHook,
21 requests,
22}:
23let
24 version = "1.4.4";
25
26 src = fetchFromGitHub {
27 owner = "RapidAI";
28 repo = "RapidOCR";
29 tag = "v${version}";
30 hash = "sha256-x0VELDKOffxbV3v0aDFJFuDC4YfsGM548XWgINmRc3M=";
31 };
32
33 models =
34 fetchzip {
35 url = "https://github.com/RapidAI/RapidOCR/releases/download/v1.1.0/required_for_whl_v1.3.0.zip";
36 hash = "sha256-j/0nzyvu/HfNTt5EZ+2Phe5dkyPOdQw/OZTz0yS63aA=";
37 stripRoot = false;
38 }
39 + "/required_for_whl_v1.3.0/resources/models";
40in
41buildPythonPackage {
42 pname = "rapidocr-onnxruntime";
43 inherit version src;
44 pyproject = true;
45
46 sourceRoot = "${src.name}/python";
47
48 # HACK:
49 # Upstream uses a very unconventional structure to organize the packages, and we have to coax the
50 # existing infrastructure to work with it.
51 # See https://github.com/RapidAI/RapidOCR/blob/02829ef986bc2a5c4f33e9c45c9267bcf2d07a1d/.github/workflows/gen_whl_to_pypi_rapidocr_ort.yml#L80-L92
52 # for the "intended" way of building this package.
53
54 # The setup.py supplied by upstream tries to determine the current version by
55 # fetching the latest version of the package from PyPI, and then bumping the version number.
56 # This is not allowed in the Nix build environment as we do not have internet access,
57 # hence we patch that out and get the version from the build environment directly.
58 patches = [
59 (replaceVars ./setup-py-override-version-checking.patch {
60 inherit version;
61 })
62 ];
63
64 postPatch = ''
65 mv setup_onnxruntime.py setup.py
66 mkdir -p rapidocr_onnxruntime/models
67
68 ln -s ${models}/* rapidocr_onnxruntime/models
69
70 # Magic patch from upstream - what does this even do??
71 echo "from .rapidocr_onnxruntime.main import RapidOCR, VisRes" > __init__.py
72 '';
73
74 # Upstream expects the source files to be under rapidocr_onnxruntime/rapidocr_onnxruntime
75 # instead of rapidocr_onnxruntime for the wheel to build correctly.
76 preBuild = ''
77 mkdir rapidocr_onnxruntime_t
78 mv rapidocr_onnxruntime rapidocr_onnxruntime_t
79 mv rapidocr_onnxruntime_t rapidocr_onnxruntime
80 '';
81
82 # Revert the above hack
83 postBuild = ''
84 mv rapidocr_onnxruntime rapidocr_onnxruntime_t
85 mv rapidocr_onnxruntime_t/* .
86 '';
87
88 build-system = [ setuptools ];
89
90 dependencies = [
91 pyclipper
92 opencv-python
93 numpy
94 six
95 shapely
96 pyyaml
97 pillow
98 onnxruntime
99 tqdm
100 ];
101
102 pythonImportsCheck = [ "rapidocr_onnxruntime" ];
103
104 nativeCheckInputs = [
105 pytestCheckHook
106 requests
107 ];
108
109 # These are tests for different backends.
110 disabledTestPaths = [
111 "tests/test_vino.py"
112 "tests/test_paddle.py"
113 ];
114
115 disabledTests = [
116 # Needs Internet access
117 "test_long_img"
118 ];
119
120 meta = {
121 # This seems to be related to https://github.com/microsoft/onnxruntime/issues/10038
122 # Also some related issue: https://github.com/NixOS/nixpkgs/pull/319053#issuecomment-2167713362
123 badPlatforms = [ "aarch64-linux" ];
124 changelog = "https://github.com/RapidAI/RapidOCR/releases/tag/${src.tag}";
125 description = "Cross platform OCR Library based on OnnxRuntime";
126 homepage = "https://github.com/RapidAI/RapidOCR";
127 license = with lib.licenses; [ asl20 ];
128 maintainers = with lib.maintainers; [ pluiedev ];
129 mainProgram = "rapidocr_onnxruntime";
130 };
131}