1{ lib 2, buildPythonPackage 3, cffi 4, coloredlogs 5, fetchFromGitHub 6, ghostscript 7, img2pdf 8, importlib-metadata 9, importlib-resources 10, jbig2enc 11, leptonica 12, pdfminer 13, pikepdf 14, pillow 15, pluggy 16, pngquant 17, pytest-xdist 18, pytestCheckHook 19, pythonOlder 20, reportlab 21, setuptools 22, setuptools-scm 23, setuptools-scm-git-archive 24, stdenv 25, substituteAll 26, tesseract4 27, tqdm 28, unpaper 29}: 30 31buildPythonPackage rec { 32 pname = "ocrmypdf"; 33 version = "12.7.2"; 34 35 src = fetchFromGitHub { 36 owner = "jbarlow83"; 37 repo = "OCRmyPDF"; 38 rev = "v${version}"; 39 # The content of .git_archival.txt is substituted upon tarball creation, 40 # which creates indeterminism if master no longer points to the tag. 41 # See https://github.com/jbarlow83/OCRmyPDF/issues/841 42 extraPostFetch = '' 43 rm "$out/.git_archival.txt" 44 ''; 45 sha256 = "sha256-+mh7NgAk7R/94FXjRV+SLy478pZwYLLS8HwCazEbMf4="; 46 }; 47 48 SETUPTOOLS_SCM_PRETEND_VERSION = version; 49 50 patches = [ 51 (substituteAll { 52 src = ./paths.patch; 53 gs = "${lib.getBin ghostscript}/bin/gs"; 54 jbig2 = "${lib.getBin jbig2enc}/bin/jbig2"; 55 liblept = "${lib.getLib leptonica}/lib/liblept${stdenv.hostPlatform.extensions.sharedLibrary}"; 56 pngquant = "${lib.getBin pngquant}/bin/pngquant"; 57 tesseract = "${lib.getBin tesseract4}/bin/tesseract"; 58 unpaper = "${lib.getBin unpaper}/bin/unpaper"; 59 }) 60 ]; 61 62 nativeBuildInputs = [ 63 setuptools-scm-git-archive 64 setuptools-scm 65 ]; 66 67 propagatedBuildInputs = [ 68 cffi 69 coloredlogs 70 img2pdf 71 pdfminer 72 pikepdf 73 pillow 74 pluggy 75 reportlab 76 setuptools 77 tqdm 78 ] ++ (lib.optionals (pythonOlder "3.8") [ 79 importlib-metadata 80 ]) ++ (lib.optionals (pythonOlder "3.9") [ 81 importlib-resources 82 ]); 83 84 checkInputs = [ 85 pytest-xdist 86 pytestCheckHook 87 ]; 88 89 pythonImportsCheck = [ 90 "ocrmypdf" 91 ]; 92 93 meta = with lib; { 94 homepage = "https://github.com/jbarlow83/OCRmyPDF"; 95 description = "Adds an OCR text layer to scanned PDF files, allowing them to be searched"; 96 license = with licenses; [ mpl20 mit ]; 97 platforms = platforms.linux; 98 maintainers = with maintainers; [ kiwi dotlambda ]; 99 changelog = "https://github.com/jbarlow83/OCRmyPDF/blob/v${version}/docs/release_notes.rst"; 100 }; 101}