nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
1{
2 lib,
3 stdenv,
4 config,
5 buildPythonPackage,
6 fetchFromGitHub,
7
8 fetchzip,
9 replaceVars,
10
11 setuptools,
12 pyclipper,
13 opencv-python,
14 numpy,
15 six,
16 shapely,
17 pyyaml,
18 pillow,
19 onnxruntime,
20 tqdm,
21
22 pytestCheckHook,
23 requests,
24
25 cudaSupport ? config.cudaSupport,
26 rapidocr-onnxruntime,
27}:
28let
29 models =
30 fetchzip {
31 url = "https://github.com/RapidAI/RapidOCR/releases/download/v1.1.0/required_for_whl_v1.3.0.zip";
32 hash = "sha256-j/0nzyvu/HfNTt5EZ+2Phe5dkyPOdQw/OZTz0yS63aA=";
33 stripRoot = false;
34 }
35 + "/required_for_whl_v1.3.0/resources/models";
36
37 isNotAarch64Linux = !(stdenv.hostPlatform.isLinux && stdenv.hostPlatform.isAarch64);
38in
39buildPythonPackage (finalAttrs: {
40 pname = "rapidocr-onnxruntime";
41 version = "1.4.4";
42 pyproject = true;
43
44 src = fetchFromGitHub {
45 owner = "RapidAI";
46 repo = "RapidOCR";
47 tag = "v${finalAttrs.version}";
48 hash = "sha256-x0VELDKOffxbV3v0aDFJFuDC4YfsGM548XWgINmRc3M=";
49 };
50
51 sourceRoot = "${finalAttrs.src.name}/python";
52
53 # HACK:
54 # Upstream uses a very unconventional structure to organize the packages, and we have to coax the
55 # existing infrastructure to work with it.
56 # See https://github.com/RapidAI/RapidOCR/blob/02829ef986bc2a5c4f33e9c45c9267bcf2d07a1d/.github/workflows/gen_whl_to_pypi_rapidocr_ort.yml#L80-L92
57 # for the "intended" way of building this package.
58
59 # The setup.py supplied by upstream tries to determine the current version by
60 # fetching the latest version of the package from PyPI, and then bumping the version number.
61 # This is not allowed in the Nix build environment as we do not have internet access,
62 # hence we patch that out and get the version from the build environment directly.
63 patches = [
64 (replaceVars ./setup-py-override-version-checking.patch {
65 inherit (finalAttrs) version;
66 })
67 ];
68
69 postPatch = ''
70 mv setup_onnxruntime.py setup.py
71
72 ln -s ${models}/* rapidocr_onnxruntime/models
73
74 echo "from .rapidocr_onnxruntime.main import RapidOCR, VisRes" > __init__.py
75 '';
76
77 # Upstream expects the source files to be under rapidocr_onnxruntime/rapidocr_onnxruntime
78 # instead of rapidocr_onnxruntime for the wheel to build correctly.
79 preBuild = ''
80 mkdir rapidocr_onnxruntime_t
81 mv rapidocr_onnxruntime rapidocr_onnxruntime_t
82 mv rapidocr_onnxruntime_t rapidocr_onnxruntime
83 '';
84
85 # Revert the above hack
86 postBuild = ''
87 mv rapidocr_onnxruntime rapidocr_onnxruntime_t
88 mv rapidocr_onnxruntime_t/* .
89 '';
90
91 build-system = [ setuptools ];
92
93 dependencies = [
94 pyclipper
95 opencv-python
96 numpy
97 six
98 shapely
99 pyyaml
100 pillow
101 onnxruntime
102 tqdm
103 ];
104
105 # aarch64-linux fails cpuinfo test, because /sys/devices/system/cpu/ does not exist in the sandbox:
106 # terminate called after throwing an instance of 'onnxruntime::OnnxRuntimeException'
107 #
108 # -> Skip all tests that require importing markitdown
109 pythonImportsCheck = lib.optionals isNotAarch64Linux [
110 "rapidocr_onnxruntime"
111 ];
112
113 nativeCheckInputs = [
114 pytestCheckHook
115 requests
116 ];
117
118 # These are tests for different backends.
119 disabledTestPaths = [
120 "tests/test_vino.py"
121 "tests/test_paddle.py"
122 ];
123
124 disabledTests = [
125 # Needs Internet access
126 "test_long_img"
127 ];
128
129 doCheck =
130 # Tests require access to a physical GPU to work, otherwise the interpreter crashes:
131 # Fatal Python error: Aborted
132 # File "/nix/store/..onnxruntime/capi/onnxruntime_inference_collection.py", line 561 in _create_inference_session
133 (!cudaSupport)
134 # See comment above
135 # 'onnxruntime::OnnxRuntimeException'
136 && isNotAarch64Linux;
137
138 # rapidocr-onnxruntime has been renamed to rapidocr by upstream since 2.0.0. However, some packages like open-webui still requires rapidocr-onnxruntime 1.4.4. Therefore we set no auto update here.
139 # nixpkgs-update: no auto update
140 passthru.skipBulkUpdate = true;
141
142 passthru.gpuCheck = rapidocr-onnxruntime.overridePythonAttrs (old: {
143 requiredSystemFeatures = [ "cuda" ];
144 doCheck = true;
145
146 disabledTests =
147 (old.disabledTests or [ ])
148 ++ lib.optionals cudaSupport [
149 # IndexError: list index out of range
150 "test_ort_cuda_warning"
151 ];
152 });
153
154 meta = {
155 changelog = "https://github.com/RapidAI/RapidOCR/releases/tag/${finalAttrs.src.tag}";
156 description = "Cross platform OCR Library based on OnnxRuntime";
157 homepage = "https://github.com/RapidAI/RapidOCR";
158 license = lib.licenses.asl20;
159 maintainers = with lib.maintainers; [ wrvsrx ];
160 mainProgram = "rapidocr_onnxruntime";
161 };
162})