1{ lib
2, buildPythonPackage
3, coloredlogs
4, deprecation
5, fetchFromGitHub
6, ghostscript
7, img2pdf
8, importlib-resources
9, jbig2enc
10, packaging
11, pdfminer-six
12, pikepdf
13, pillow
14, pluggy
15, pngquant
16, pytest-xdist
17, pytestCheckHook
18, pythonOlder
19, reportlab
20, setuptools
21, setuptools-scm
22, substituteAll
23, tesseract
24, tqdm
25, typing-extensions
26, unpaper
27, installShellFiles
28}:
29
30buildPythonPackage rec {
31 pname = "ocrmypdf";
32 version = "14.0.1";
33
34 disabled = pythonOlder "3.8";
35
36 format = "pyproject";
37
38 src = fetchFromGitHub {
39 owner = "ocrmypdf";
40 repo = "OCRmyPDF";
41 rev = "v${version}";
42 # The content of .git_archival.txt is substituted upon tarball creation,
43 # which creates indeterminism if master no longer points to the tag.
44 # See https://github.com/ocrmypdf/OCRmyPDF/issues/841
45 postFetch = ''
46 rm "$out/.git_archival.txt"
47 '';
48 hash = "sha256-eYn24FkAXj/ESCoC0QaLY+wRhkxZP1KnuY4VU1WiG24=";
49 };
50
51 SETUPTOOLS_SCM_PRETEND_VERSION = version;
52
53 patches = [
54 (substituteAll {
55 src = ./paths.patch;
56 gs = "${lib.getBin ghostscript}/bin/gs";
57 jbig2 = "${lib.getBin jbig2enc}/bin/jbig2";
58 pngquant = "${lib.getBin pngquant}/bin/pngquant";
59 tesseract = "${lib.getBin tesseract}/bin/tesseract";
60 unpaper = "${lib.getBin unpaper}/bin/unpaper";
61 })
62 ];
63
64 nativeBuildInputs = [
65 setuptools
66 setuptools-scm
67 installShellFiles
68 ];
69
70 propagatedBuildInputs = [
71 coloredlogs
72 deprecation
73 img2pdf
74 packaging
75 pdfminer-six
76 pikepdf
77 pillow
78 pluggy
79 reportlab
80 tqdm
81 ] ++ lib.optionals (pythonOlder "3.9") [
82 importlib-resources
83 ] ++ lib.optionals (pythonOlder "3.10") [
84 typing-extensions
85 ];
86
87 checkInputs = [
88 pytest-xdist
89 pytestCheckHook
90 ];
91
92 pythonImportsCheck = [
93 "ocrmypdf"
94 ];
95
96 postInstall = ''
97 installShellCompletion --cmd ocrmypdf \
98 --bash misc/completion/ocrmypdf.bash \
99 --fish misc/completion/ocrmypdf.fish
100 '';
101
102 meta = with lib; {
103 homepage = "https://github.com/ocrmypdf/OCRmyPDF";
104 description = "Adds an OCR text layer to scanned PDF files, allowing them to be searched";
105 license = with licenses; [ mpl20 mit ];
106 maintainers = with maintainers; [ kiwi dotlambda ];
107 changelog = "https://github.com/ocrmypdf/OCRmyPDF/blob/${src.rev}/docs/release_notes.rst";
108 };
109}