1{ lib
2, buildPythonPackage
3, cffi
4, coloredlogs
5, fetchFromGitHub
6, ghostscript
7, img2pdf
8, importlib-metadata
9, importlib-resources
10, jbig2enc
11, leptonica
12, pdfminer
13, pikepdf
14, pillow
15, pluggy
16, pngquant
17, pytest-xdist
18, pytestCheckHook
19, pythonOlder
20, reportlab
21, setuptools
22, setuptools-scm
23, setuptools-scm-git-archive
24, stdenv
25, substituteAll
26, tesseract4
27, tqdm
28, unpaper
29}:
30
31buildPythonPackage rec {
32 pname = "ocrmypdf";
33 version = "12.7.2";
34
35 src = fetchFromGitHub {
36 owner = "jbarlow83";
37 repo = "OCRmyPDF";
38 rev = "v${version}";
39 # The content of .git_archival.txt is substituted upon tarball creation,
40 # which creates indeterminism if master no longer points to the tag.
41 # See https://github.com/jbarlow83/OCRmyPDF/issues/841
42 extraPostFetch = ''
43 rm "$out/.git_archival.txt"
44 '';
45 sha256 = "sha256-+mh7NgAk7R/94FXjRV+SLy478pZwYLLS8HwCazEbMf4=";
46 };
47
48 SETUPTOOLS_SCM_PRETEND_VERSION = version;
49
50 patches = [
51 (substituteAll {
52 src = ./paths.patch;
53 gs = "${lib.getBin ghostscript}/bin/gs";
54 jbig2 = "${lib.getBin jbig2enc}/bin/jbig2";
55 liblept = "${lib.getLib leptonica}/lib/liblept${stdenv.hostPlatform.extensions.sharedLibrary}";
56 pngquant = "${lib.getBin pngquant}/bin/pngquant";
57 tesseract = "${lib.getBin tesseract4}/bin/tesseract";
58 unpaper = "${lib.getBin unpaper}/bin/unpaper";
59 })
60 ];
61
62 nativeBuildInputs = [
63 setuptools-scm-git-archive
64 setuptools-scm
65 ];
66
67 propagatedBuildInputs = [
68 cffi
69 coloredlogs
70 img2pdf
71 pdfminer
72 pikepdf
73 pillow
74 pluggy
75 reportlab
76 setuptools
77 tqdm
78 ] ++ (lib.optionals (pythonOlder "3.8") [
79 importlib-metadata
80 ]) ++ (lib.optionals (pythonOlder "3.9") [
81 importlib-resources
82 ]);
83
84 checkInputs = [
85 pytest-xdist
86 pytestCheckHook
87 ];
88
89 pythonImportsCheck = [
90 "ocrmypdf"
91 ];
92
93 meta = with lib; {
94 homepage = "https://github.com/jbarlow83/OCRmyPDF";
95 description = "Adds an OCR text layer to scanned PDF files, allowing them to be searched";
96 license = with licenses; [ mpl20 mit ];
97 platforms = platforms.linux;
98 maintainers = with maintainers; [ kiwi dotlambda ];
99 changelog = "https://github.com/jbarlow83/OCRmyPDF/blob/v${version}/docs/release_notes.rst";
100 };
101}