at 22.05-pre 15 kB view raw
1diff --git a/src/pyocr/cuneiform.py b/src/pyocr/cuneiform.py 2index 2e5b717..35647e2 100644 3--- a/src/pyocr/cuneiform.py 4+++ b/src/pyocr/cuneiform.py 5@@ -25,13 +25,9 @@ from . import builders 6 from .error import CuneiformError 7 8 9-# CHANGE THIS IF CUNEIFORM IS NOT IN YOUR PATH, OR IS NAMED DIFFERENTLY 10-CUNEIFORM_CMD = 'cuneiform' 11+CUNEIFORM_CMD = '@cuneiform@/bin/cuneiform' 12 13-CUNEIFORM_DATA_POSSIBLE_PATHS = [ 14- "/usr/local/share/cuneiform", 15- "/usr/share/cuneiform", 16-] 17+CUNEIFORM_DATA_POSSIBLE_PATHS = ['@cuneiform@/share/cuneiform'] 18 19 LANGUAGES_LINE_PREFIX = "Supported languages: " 20 LANGUAGES_SPLIT_RE = re.compile("[^a-z]") 21diff --git a/src/pyocr/libtesseract/tesseract_raw.py b/src/pyocr/libtesseract/tesseract_raw.py 22index a068e73..9ebea5c 100644 23--- a/src/pyocr/libtesseract/tesseract_raw.py 24+++ b/src/pyocr/libtesseract/tesseract_raw.py 25@@ -2,7 +2,6 @@ import ctypes 26 import locale 27 import logging 28 import os 29-import sys 30 31 from ..error import TesseractError 32 33@@ -10,48 +9,16 @@ from ..error import TesseractError 34 logger = logging.getLogger(__name__) 35 36 TESSDATA_PREFIX = os.getenv('TESSDATA_PREFIX', None) 37-libnames = [] 38+if TESSDATA_PREFIX is None: 39+ TESSDATA_PREFIX = '@tesseract@/share/tessdata' 40+ os.environ['TESSDATA_PREFIX'] = TESSDATA_PREFIX 41+ 42+ 43 # 70 is the minimum credible dpi for tesseract and force it to compute an 44 # estimate of the image dpi 45 DPI_DEFAULT = 70 46 47- 48-if getattr(sys, 'frozen', False): # pragma: no cover 49- # Pyinstaller integration 50- libnames += [os.path.join(sys._MEIPASS, "libtesseract-4.dll")] 51- libnames += [os.path.join(sys._MEIPASS, "libtesseract-3.dll")] 52- tessdata = os.path.join(sys._MEIPASS, "data") 53- if not os.path.exists(os.path.join(tessdata, "tessdata")): 54- logger.warning( 55- "Running from container, but no tessdata ({}) found !".format( 56- tessdata 57- ) 58- ) 59- else: 60- TESSDATA_PREFIX = tessdata 61- 62- 63-if sys.platform[:3] == "win": # pragma: no cover 64- libnames += [ 65- # Jflesch> Don't they have the equivalent of LD_LIBRARY_PATH on 66- # Windows ? 67- "../vs2010/DLL_Release/libtesseract302.dll", 68- # prefer the most recent first 69- "libtesseract305.dll", 70- "libtesseract304.dll", 71- "libtesseract303.dll", 72- "libtesseract302.dll", 73- "libtesseract400.dll", # Tesseract 4 is still in alpha stage 74- "libtesseract.dll", 75- "C:\\Program Files (x86)\\Tesseract-OCR\\libtesseract-4.dll", 76- "C:\\Program Files (x86)\\Tesseract-OCR\\libtesseract-3.dll", 77- ] 78-else: 79- libnames += [ 80- "libtesseract.so.4", 81- "libtesseract.so.3", 82- ] 83- 84+libnames = [ "@tesseract@/lib/libtesseract.so" ] 85 86 g_libtesseract = None 87 88@@ -364,12 +331,12 @@ def init(lang=None): 89 try: 90 if lang: 91 lang = lang.encode("utf-8") 92- prefix = None 93- if TESSDATA_PREFIX: # pragma: no cover 94- prefix = TESSDATA_PREFIX.encode("utf-8") 95+ 96+ prefix = TESSDATA_PREFIX 97+ 98 g_libtesseract.TessBaseAPIInit3( 99 ctypes.c_void_p(handle), 100- ctypes.c_char_p(prefix), 101+ ctypes.c_char_p(prefix.encode('utf-8')), 102 ctypes.c_char_p(lang) 103 ) 104 g_libtesseract.TessBaseAPISetVariable( 105diff --git a/src/pyocr/tesseract.py b/src/pyocr/tesseract.py 106index 7c30852..44e8446 100644 107--- a/src/pyocr/tesseract.py 108+++ b/src/pyocr/tesseract.py 109@@ -28,8 +28,7 @@ from .builders import DigitBuilder # backward compatibility 110 from .error import TesseractError # backward compatibility 111 from .util import digits_only 112 113-# CHANGE THIS IF TESSERACT IS NOT IN YOUR PATH, OR IS NAMED DIFFERENTLY 114-TESSERACT_CMD = 'tesseract.exe' if os.name == 'nt' else 'tesseract' 115+TESSERACT_CMD = '@tesseract@/bin/tesseract' 116 117 TESSDATA_EXTENSION = ".traineddata" 118 119diff --git a/tests/tests_cuneiform.py b/tests/tests_cuneiform.py 120index 45b7f6a..95f55c6 100644 121--- a/tests/tests_cuneiform.py 122+++ b/tests/tests_cuneiform.py 123@@ -21,7 +21,7 @@ class TestCuneiform(BaseTest): 124 # XXX is it useful? 125 which.return_value = True 126 self.assertTrue(cuneiform.is_available()) 127- which.assert_called_once_with("cuneiform") 128+ which.assert_called_once_with("@cuneiform@/bin/cuneiform") 129 130 @patch("subprocess.Popen") 131 def test_version(self, popen): 132@@ -54,7 +54,7 @@ class TestCuneiform(BaseTest): 133 self.assertIn("eng", langs) 134 self.assertIn("fra", langs) 135 popen.assert_called_once_with( 136- ["cuneiform", "-l"], 137+ ["@cuneiform@/bin/cuneiform", "-l"], 138 stdout=subprocess.PIPE, stderr=subprocess.STDOUT 139 ) 140 141@@ -109,7 +109,7 @@ class TestCuneiformTxt(BaseTest): 142 output = cuneiform.image_to_string(self.image) 143 self.assertEqual(output, self._get_file_content("text").strip()) 144 popen.assert_called_once_with( 145- ["cuneiform", "-f", "text", "-o", self.tmp_filename, "-"], 146+ ["@cuneiform@/bin/cuneiform", "-f", "text", "-o", self.tmp_filename, "-"], 147 stdin=subprocess.PIPE, stdout=subprocess.PIPE, 148 stderr=subprocess.STDOUT 149 ) 150@@ -125,7 +125,7 @@ class TestCuneiformTxt(BaseTest): 151 builder=self.builder) 152 self.assertEqual(output, self._get_file_content("text").strip()) 153 popen.assert_called_once_with( 154- ["cuneiform", "-l", "fra", "-f", "text", "-o", self.tmp_filename, 155+ ["@cuneiform@/bin/cuneiform", "-l", "fra", "-f", "text", "-o", self.tmp_filename, 156 "-"], 157 stdin=subprocess.PIPE, stdout=subprocess.PIPE, 158 stderr=subprocess.STDOUT 159@@ -142,7 +142,7 @@ class TestCuneiformTxt(BaseTest): 160 builder=self.builder) 161 self.assertEqual(output, self._get_file_content("text").strip()) 162 popen.assert_called_once_with( 163- ["cuneiform", "-f", "text", "-o", self.tmp_filename, "-"], 164+ ["@cuneiform@/bin/cuneiform", "-f", "text", "-o", self.tmp_filename, "-"], 165 stdin=subprocess.PIPE, stdout=subprocess.PIPE, 166 stderr=subprocess.STDOUT 167 ) 168@@ -173,7 +173,7 @@ class TestCuneiformTxt(BaseTest): 169 output = cuneiform.image_to_string(image, builder=self.builder) 170 self.assertEqual(output, self._get_file_content("text").strip()) 171 popen.assert_called_once_with( 172- ["cuneiform", "-f", "text", "-o", self.tmp_filename, "-"], 173+ ["@cuneiform@/bin/cuneiform", "-f", "text", "-o", self.tmp_filename, "-"], 174 stdin=subprocess.PIPE, stdout=subprocess.PIPE, 175 stderr=subprocess.STDOUT 176 ) 177@@ -227,7 +227,7 @@ class TestCuneiformWordBox(BaseTest): 178 output = cuneiform.image_to_string(self.image, 179 builder=self.builder) 180 popen.assert_called_once_with( 181- ["cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"], 182+ ["@cuneiform@/bin/cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"], 183 stdin=subprocess.PIPE, stdout=subprocess.PIPE, 184 stderr=subprocess.STDOUT 185 ) 186@@ -280,7 +280,7 @@ class TestCuneiformLineBox(BaseTest): 187 output = cuneiform.image_to_string(self.image, 188 builder=self.builder) 189 popen.assert_called_once_with( 190- ["cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"], 191+ ["@cuneiform@/bin/cuneiform", "-f", "hocr", "-o", self.tmp_filename, "-"], 192 stdin=subprocess.PIPE, stdout=subprocess.PIPE, 193 stderr=subprocess.STDOUT 194 ) 195diff --git a/tests/tests_libtesseract.py b/tests/tests_libtesseract.py 196index ad7fdc9..57e7a60 100644 197--- a/tests/tests_libtesseract.py 198+++ b/tests/tests_libtesseract.py 199@@ -165,7 +165,8 @@ class TestLibTesseractRaw(BaseTest): 200 args = libtess.TessBaseAPIInit3.call_args[0] 201 self.assertEqual(len(args), 3) 202 self.assertEqual(args[0].value, self.handle) 203- self.assertEqual(args[1].value, None) 204+ # we hardcode tesseract data, so we don't get None 205+ #self.assertEqual(args[1].value, None) 206 self.assertEqual(args[2].value, lang.encode() if lang else None) 207 208 self.assertEqual( 209@@ -201,7 +202,8 @@ class TestLibTesseractRaw(BaseTest): 210 args = libtess.TessBaseAPIInit3.call_args[0] 211 self.assertEqual(len(args), 3) 212 self.assertEqual(args[0].value, self.handle) 213- self.assertEqual(args[1].value, None) 214+ # we hardcode tesseract data, so we don't get None 215+ #self.assertEqual(args[1].value, None) 216 self.assertEqual(args[2].value, lang.encode() if lang else None) 217 218 self.assertEqual( 219diff --git a/tests/tests_tesseract.py b/tests/tests_tesseract.py 220index 1a55567..a24d96f 100644 221--- a/tests/tests_tesseract.py 222+++ b/tests/tests_tesseract.py 223@@ -36,7 +36,7 @@ class TestTesseract(BaseTest): 224 def test_available(self, which): 225 which.return_value = True 226 self.assertTrue(tesseract.is_available()) 227- which.assert_called_once_with("tesseract") 228+ which.assert_called_once_with("@tesseract@/bin/tesseract") 229 230 @patch("subprocess.Popen") 231 def test_version_error(self, popen): 232@@ -156,7 +156,7 @@ class TestTesseract(BaseTest): 233 for lang in ("eng", "fra", "jpn", "osd"): 234 self.assertIn(lang, langs) 235 popen.assert_called_once_with( 236- ["tesseract", "--list-langs"], 237+ ["@tesseract@/bin/tesseract", "--list-langs"], 238 startupinfo=None, creationflags=0, 239 stdout=subprocess.PIPE, stderr=subprocess.STDOUT 240 ) 241@@ -171,7 +171,7 @@ class TestTesseract(BaseTest): 242 self.assertEqual(te.exception.status, 1) 243 self.assertEqual("unable to get languages", te.exception.message) 244 popen.assert_called_once_with( 245- ["tesseract", "--list-langs"], 246+ ["@tesseract@/bin/tesseract", "--list-langs"], 247 startupinfo=None, creationflags=0, 248 stdout=subprocess.PIPE, stderr=subprocess.STDOUT 249 ) 250@@ -248,7 +248,7 @@ class TestTesseract(BaseTest): 251 self.assertEqual(status, 0) 252 self.assertEqual(error, message) 253 popen.assert_called_once_with( 254- ["tesseract", "input.bmp", "output"], 255+ ["@tesseract@/bin/tesseract", "input.bmp", "output"], 256 cwd=tmpdir, 257 startupinfo=None, 258 creationflags=0, 259@@ -271,7 +271,7 @@ class TestTesseract(BaseTest): 260 self.assertEqual(status, 0) 261 self.assertEqual(error, message) 262 popen.assert_called_with( 263- ["tesseract", "input2.bmp", "output2", "-l", "fra", "--psm", "3"], 264+ ["@tesseract@/bin/tesseract", "input2.bmp", "output2", "-l", "fra", "--psm", "3"], 265 cwd=tmpdir, 266 startupinfo=None, 267 creationflags=0, 268@@ -302,7 +302,7 @@ class TestTesseract(BaseTest): 269 self.assertEqual(result["angle"], 90) 270 self.assertEqual(result["confidence"], 9.30) 271 popen.assert_called_once_with( 272- ["tesseract", "input.bmp", "stdout", "--psm", "0"], 273+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "--psm", "0"], 274 stdin=subprocess.PIPE, 275 shell=False, 276 startupinfo=None, 277@@ -338,7 +338,7 @@ class TestTesseract(BaseTest): 278 self.assertEqual(result["angle"], 90) 279 self.assertEqual(result["confidence"], 9.30) 280 popen.assert_called_once_with( 281- ["tesseract", "input.bmp", "stdout", "--psm", "0"], 282+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "--psm", "0"], 283 stdin=subprocess.PIPE, 284 shell=False, 285 startupinfo=None, 286@@ -371,7 +371,7 @@ class TestTesseract(BaseTest): 287 self.assertEqual(result["angle"], 90) 288 self.assertEqual(result["confidence"], 9.30) 289 popen.assert_called_once_with( 290- ["tesseract", "input.bmp", "stdout", 291+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", 292 "--psm", "0", "-l", "osd"], 293 stdin=subprocess.PIPE, 294 shell=False, 295@@ -399,7 +399,7 @@ class TestTesseract(BaseTest): 296 with self.assertRaises(tesseract.TesseractError) as te: 297 tesseract.detect_orientation(self.image) 298 popen.assert_called_once_with( 299- ["tesseract", "input.bmp", "stdout", "--psm", "0"], 300+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "--psm", "0"], 301 stdin=subprocess.PIPE, 302 shell=False, 303 startupinfo=None, 304@@ -433,7 +433,7 @@ class TestTesseract(BaseTest): 305 with self.assertRaises(tesseract.TesseractError) as te: 306 tesseract.detect_orientation(self.image) 307 popen.assert_called_once_with( 308- ["tesseract", "input.bmp", "stdout", "--psm", "0"], 309+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "--psm", "0"], 310 stdin=subprocess.PIPE, 311 shell=False, 312 startupinfo=None, 313@@ -467,7 +467,7 @@ class TestTesseract(BaseTest): 314 self.assertEqual(result["angle"], 90) 315 self.assertEqual(result["confidence"], 9.30) 316 popen.assert_called_once_with( 317- ["tesseract", "input.bmp", "stdout", "-psm", "0"], 318+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "-psm", "0"], 319 stdin=subprocess.PIPE, 320 shell=False, 321 startupinfo=None, 322@@ -500,7 +500,7 @@ class TestTesseract(BaseTest): 323 self.assertEqual(result["angle"], 90) 324 self.assertEqual(result["confidence"], 9.30) 325 popen.assert_called_once_with( 326- ["tesseract", "input.bmp", "stdout", "-psm", "0", "-l", "fra"], 327+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "-psm", "0", "-l", "fra"], 328 stdin=subprocess.PIPE, 329 shell=False, 330 startupinfo=None, 331@@ -527,7 +527,7 @@ class TestTesseract(BaseTest): 332 with self.assertRaises(tesseract.TesseractError) as te: 333 tesseract.detect_orientation(self.image) 334 popen.assert_called_once_with( 335- ["tesseract", "input.bmp", "stdout", "-psm", "0"], 336+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "-psm", "0"], 337 stdin=subprocess.PIPE, 338 shell=False, 339 startupinfo=None, 340@@ -561,7 +561,7 @@ class TestTesseract(BaseTest): 341 with self.assertRaises(tesseract.TesseractError) as te: 342 tesseract.detect_orientation(self.image) 343 popen.assert_called_once_with( 344- ["tesseract", "input.bmp", "stdout", "-psm", "0"], 345+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "-psm", "0"], 346 stdin=subprocess.PIPE, 347 shell=False, 348 startupinfo=None,