nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
1{ lib
2, buildPythonPackage
3, coloredlogs
4, fetchFromGitHub
5, fetchpatch
6, ghostscript
7, img2pdf
8, importlib-metadata
9, importlib-resources
10, jbig2enc
11, pdfminer
12, pikepdf
13, pillow
14, pluggy
15, pngquant
16, pytest-xdist
17, pytestCheckHook
18, pythonOlder
19, reportlab
20, setuptools-scm
21, setuptools-scm-git-archive
22, stdenv
23, substituteAll
24, tesseract4
25, tqdm
26, unpaper
27}:
28
29buildPythonPackage rec {
30 pname = "ocrmypdf";
31 version = "13.4.4";
32
33 src = fetchFromGitHub {
34 owner = "jbarlow83";
35 repo = "OCRmyPDF";
36 rev = "v${version}";
37 # The content of .git_archival.txt is substituted upon tarball creation,
38 # which creates indeterminism if master no longer points to the tag.
39 # See https://github.com/jbarlow83/OCRmyPDF/issues/841
40 postFetch = ''
41 rm "$out/.git_archival.txt"
42 '';
43 hash = "sha256-uFKnohUxh17h6u0vwVB7EaTEh5NRowP8a6za63Ehodk=";
44 };
45
46 SETUPTOOLS_SCM_PRETEND_VERSION = version;
47
48 patches = [
49 (substituteAll {
50 src = ./paths.patch;
51 gs = "${lib.getBin ghostscript}/bin/gs";
52 jbig2 = "${lib.getBin jbig2enc}/bin/jbig2";
53 pngquant = "${lib.getBin pngquant}/bin/pngquant";
54 tesseract = "${lib.getBin tesseract4}/bin/tesseract";
55 unpaper = "${lib.getBin unpaper}/bin/unpaper";
56 })
57 # https://github.com/ocrmypdf/OCRmyPDF/pull/973
58 (fetchpatch {
59 url = "https://github.com/ocrmypdf/OCRmyPDF/commit/808b24d59f5b541a335006aa6ea7cdc3c991adc0.patch";
60 hash = "sha256-khsH70fWk5fStf94wcRKKX7cCbgD69LtKkngJIqA3+w=";
61 })
62 ];
63
64 nativeBuildInputs = [
65 setuptools-scm-git-archive
66 setuptools-scm
67 ];
68
69 propagatedBuildInputs = [
70 coloredlogs
71 img2pdf
72 pdfminer
73 pikepdf
74 pillow
75 pluggy
76 reportlab
77 tqdm
78 ] ++ (lib.optionals (pythonOlder "3.8") [
79 importlib-metadata
80 ]) ++ (lib.optionals (pythonOlder "3.9") [
81 importlib-resources
82 ]);
83
84 checkInputs = [
85 pytest-xdist
86 pytestCheckHook
87 ];
88
89 postPatch = ''
90 # https://github.com/ocrmypdf/OCRmyPDF/issues/933
91 substituteInPlace setup.cfg \
92 --replace "pdfminer.six!=20200720,>=20191110,<=20220319" "pdfminer.six!=20200720,>=20191110"
93 '';
94
95 pythonImportsCheck = [
96 "ocrmypdf"
97 ];
98
99 meta = with lib; {
100 homepage = "https://github.com/jbarlow83/OCRmyPDF";
101 description = "Adds an OCR text layer to scanned PDF files, allowing them to be searched";
102 license = with licenses; [ mpl20 mit ];
103 maintainers = with maintainers; [ kiwi dotlambda ];
104 changelog = "https://github.com/jbarlow83/OCRmyPDF/blob/v${version}/docs/release_notes.rst";
105 };
106}