1{ lib, fetchFromGitHub, buildPythonPackage, pillow, six
2, tesseract, cuneiform
3}:
4
5buildPythonPackage rec {
6 pname = "pyocr";
7 version = "0.4.7";
8 name = pname + "-" + version;
9
10 # Don't fetch from PYPI because it doesn't contain tests.
11 src = fetchFromGitHub {
12 owner = "jflesch";
13 repo = "pyocr";
14 rev = version;
15 sha256 = "1iw73r8yrgjf8g00yzpz62ymqbf89cqhyhl9g430srmsrq7mn2yd";
16 };
17
18 NIX_CUNEIFORM_CMD = "${cuneiform}/bin/cuneiform";
19 NIX_CUNEIFORM_DATA = "${cuneiform}/share/cuneiform";
20 NIX_LIBTESSERACT_PATH = "${tesseract}/lib/libtesseract.so";
21 NIX_TESSDATA_PREFIX = "${tesseract}/share/tessdata";
22 NIX_TESSERACT_CMD = "${tesseract}/bin/tesseract";
23
24 patches = [ ./paths.patch ];
25
26 postPatch = ''
27 substituteInPlace src/pyocr/cuneiform.py \
28 --subst-var NIX_CUNEIFORM_CMD \
29 --subst-var NIX_CUNEIFORM_CMD
30
31 substituteInPlace src/pyocr/tesseract.py \
32 --subst-var NIX_TESSERACT_CMD
33
34 substituteInPlace src/pyocr/libtesseract/tesseract_raw.py \
35 --subst-var NIX_TESSDATA_PREFIX \
36 --subst-var NIX_LIBTESSERACT_PATH
37
38 # Disable specific tests that are probably failing because of this issue:
39 # https://github.com/jflesch/pyocr/issues/52
40 for test in $disabledTests; do
41 file="''${test%%:*}"
42 fun="''${test#*:}"
43 echo "$fun = unittest.skip($fun)" >> "tests/tests_$file.py"
44 done
45 '';
46
47 disabledTests = [
48 "cuneiform:TestTxt.test_basic"
49 "cuneiform:TestTxt.test_european"
50 "cuneiform:TestTxt.test_french"
51 "cuneiform:TestWordBox.test_basic"
52 "cuneiform:TestWordBox.test_european"
53 "cuneiform:TestWordBox.test_french"
54 "libtesseract:TestBasicDoc.test_basic"
55 "libtesseract:TestDigitLineBox.test_digits"
56 "libtesseract:TestLineBox.test_japanese"
57 "libtesseract:TestTxt.test_japanese"
58 "libtesseract:TestWordBox.test_japanese"
59 "tesseract:TestDigitLineBox.test_digits"
60 "tesseract:TestTxt.test_japanese"
61 ];
62
63 propagatedBuildInputs = [ pillow six ];
64
65 meta = {
66 homepage = "https://github.com/jflesch/pyocr";
67 description = "A Python wrapper for Tesseract and Cuneiform";
68 license = lib.licenses.gpl3Plus;
69 };
70}