1{
2 buildPythonPackage,
3 fetchPypi,
4 fetchpatch,
5 lib,
6
7 # build-system
8 cython,
9 pkg-config,
10 setuptools,
11
12 # native dependencies
13 leptonica,
14 tesseract4,
15
16 # dependencies
17 pillow,
18
19 # tests
20 unittestCheckHook,
21}:
22
23buildPythonPackage rec {
24 pname = "tesserocr";
25 version = "2.8.0";
26 format = "setuptools";
27
28 src = fetchPypi {
29 inherit pname version;
30 hash = "sha256-vlGNGxtf9UwRqtoeD9EpQlCepwWB4KizmipHOgstvTY=";
31 };
32
33 patches = [
34 # Fix a broken test. The issue has been reported upstream at
35 # https://github.com/sirfz/tesserocr/issues/363
36 # Check the status of the issue before removing this patch at the next
37 # update.
38 (fetchpatch {
39 url = "https://github.com/sirfz/tesserocr/commit/78d9e8187bd4d282d572bd5221db2c69e560e017.patch";
40 hash = "sha256-s51s9EIV9AZT6UoqwTuQ8lOjToqwIIUkDLjsvCsyYFU=";
41 })
42 ];
43
44 # https://github.com/sirfz/tesserocr/issues/314
45 postPatch = ''
46 sed -i '/allheaders.h/a\ pass\n\ncdef extern from "leptonica/pix_internal.h" nogil:' tesserocr/tesseract.pxd
47 '';
48
49 build-system = [
50 cython
51 pkg-config
52 setuptools
53 ];
54
55 buildInputs = [
56 leptonica
57 tesseract4
58 ];
59
60 dependencies = [ pillow ];
61
62 pythonImportsCheck = [ "tesserocr" ];
63
64 nativeCheckInputs = [ unittestCheckHook ];
65
66 preCheck = ''
67 rm -rf tesserocr
68 '';
69
70 meta = {
71 changelog = "https://github.com/sirfz/tesserocr/releases/tag/v${version}";
72 description = "Simple, Pillow-friendly, wrapper around the tesseract-ocr API for Optical Character Recognition (OCR)";
73 homepage = "https://github.com/sirfz/tesserocr";
74 license = lib.licenses.mit;
75 maintainers = with lib.maintainers; [ mtrsk ];
76 platforms = lib.platforms.unix;
77 };
78}