···11-{ stdenv, fetchzip, fetchurl, fetchpatch, cmake, pkgconfig
22-, zlib, libpng
11+{ stdenv, runCommand, fetchzip, fetchurl, fetchpatch, fetchFromGitHub
22+, cmake, pkgconfig, zlib, libpng
33, enableGSL ? true, gsl
44, enableGhostScript ? true, ghostscript
55, enableMuPDF ? true, mupdf
···11111212with stdenv.lib;
13131414-stdenv.mkDerivation rec {
1515- pname = "k2pdfopt";
1616- version = "2.51a";
1414+# k2pdfopt is a pain to package. It requires modified versions of mupdf,
1515+# leptonica, and tesseract. Instead of shipping patches for these upstream
1616+# packages, k2pdfopt includes just the modified source files for these
1717+# packages. The individual files from the {mupdf,leptonica,tesseract}_mod/
1818+# directories are intended to replace the corresponding source files in the
1919+# upstream packages, for a particular version of that upstream package.
2020+#
2121+# There are a few ways we could approach packaging these modified versions of
2222+# mupdf, leptonica, and mupdf:
2323+# 1) Override the upstream source with a new derivation that involves copying
2424+# the modified source files from k2pdfopt and replacing the corresponding
2525+# source files in the upstream packages. Since the files are intended for a
2626+# particular version of the upstream package, this would not allow us to easily
2727+# use updates to those packages in nixpkgs.
2828+# 2) Manually produce patches which can be applied against the upstream
2929+# project, and have the same effect as replacing those files. This is what I
3030+# believe k2pdfopt should do this for us anyway. The benefit of creating and
3131+# applying patches in this way is that minor updates (esp. security fixes) to
3232+# upstream packages might still allow these patches to apply successfully.
3333+# 3) Automatically produce these patches inside a nix derivation. This is the
3434+# approach taken here, using the "mkPatch" provided below. This has the
3535+# benefit of easier review and should hopefully be simpler to update in the
3636+# future.
3737+3838+let
3939+ # Create a patch against src based on changes applied in patchCommands
4040+ mkPatch = { name, src, patchCommands }: runCommand "${name}-k2pdfopt.patch" { inherit src; } ''
4141+ source $stdenv/setup
4242+ unpackPhase
17431818- src = (fetchzip {
1919- url = "http://www.willus.com/k2pdfopt/src/k2pdfopt_v2.51_src.zip";
2020- sha256 = "133l7xkvi67s6sfk8cfh7rmavbsf7ib5fyksk1ci6b6sch3z2sw9";
2121- });
4444+ orig=$sourceRoot
4545+ new=$sourceRoot-modded
4646+ cp -r $orig/. $new/
22472323- # Note: the v2.51a zip contains only files to be replaced in the v2.50 zip.
2424- v251a_src = (fetchzip {
2525- url = "http://www.willus.com/k2pdfopt/src/k2pdfopt_v2.51a_src.zip";
2626- sha256 = "0vvwblii7kgdwfxw8dzk6jbmz4dv94d7rkv18i60y8wkayj6yhl6";
2727- });
4848+ pushd $new >/dev/null
4949+ ${patchCommands}
5050+ popd >/dev/null
28512929- postUnpack = ''
3030- cp -r ${v251a_src}/* $sourceRoot
5252+ diff -Naur $orig $new > $out || true
3153 '';
32543333- patches = [ ./k2pdfopt.patch ./k2pdfopt-mupdf-1.16.1.patch ];
5555+ pname = "k2pdfopt";
5656+ version = "2.53";
5757+ k2pdfopt_src = fetchzip {
5858+ url = "http://www.willus.com/${pname}/src/${pname}_v${version}_src.zip";
5959+ sha256 = "1fna8bg3pascjfc3hmc6xn0xi2yh7f1qp0d344mw9hqanbnykyy8";
6060+ };
6161+in stdenv.mkDerivation rec {
6262+ inherit pname version;
6363+ src = k2pdfopt_src;
6464+6565+ patches = [
6666+ ./0001-Fix-CMakeLists.patch
6767+ ];
6868+6969+ postPatch = ''
7070+ substituteInPlace willuslib/bmpdjvu.c \
7171+ --replace "<djvu.h>" "<libdjvu/ddjvuapi.h>"
7272+ '';
34733574 nativeBuildInputs = [ cmake pkgconfig ];
36753776 buildInputs =
3877 let
3939- # The patches below were constructed by taking the files from k2pdfopt in
4040- # the {mupdf,leptonica,tesseract}_mod/ directories, replacing the
4141- # corresponding files in the respective source trees, resolving any errors
4242- # with more recent versions of these depencencies, and running diff.
4343- mupdf_modded = mupdf.overrideAttrs (attrs: {
4444- patches = attrs.patches ++ [ ./mupdf.patch ]; # Last verified with mupdf 1.16.1
7878+ # We use specific versions of these sources below to match the versions
7979+ # used in the k2pdfopt source. Note that this does _not_ need to match the
8080+ # version used elsewhere in nixpkgs, since it is only used to create the
8181+ # patch that can then be applied to the version in nixpkgs.
8282+ mupdf_patch = mkPatch {
8383+ name = "mupdf";
8484+ src = fetchurl {
8585+ url = "https://mupdf.com/downloads/archive/mupdf-1.17.0-source.tar.gz";
8686+ sha256 = "13nl9nrcx2awz9l83mlv2psi1lmn3hdnfwxvwgwiwbxlkjl3zqq0";
8787+ };
8888+ patchCommands = ''
8989+ cp ${k2pdfopt_src}/mupdf_mod/{filter-basic,font,stext-device,string}.c ./source/fitz/
9090+ cp ${k2pdfopt_src}/mupdf_mod/pdf-* ./source/pdf/
9191+ '';
9292+ };
9393+ mupdf_modded = mupdf.overrideAttrs ({ patches ? [], ... }: {
9494+ patches = patches ++ [ mupdf_patch ];
9595+ # This function is missing in font.c, see font-win32.c
9696+ postPatch = ''
9797+ echo "void pdf_install_load_system_font_funcs(fz_context *ctx) {}" >> source/fitz/font.c
9898+ '';
4599 });
4646- leptonica_modded = leptonica.overrideAttrs (attrs: {
4747- patches = [ ./leptonica.patch ]; # Last verified with leptonica 1.78.0
100100+101101+ leptonica_patch = mkPatch {
102102+ name = "leptonica";
103103+ src = fetchurl {
104104+ url = "http://www.leptonica.org/source/leptonica-1.79.0.tar.gz";
105105+ sha256 = "1n004gv1dj3pq1fcnfdclvvx5nang80336aa67nvs3nnqp4ncn84";
106106+ };
107107+ patchCommands = "cp -r ${k2pdfopt_src}/leptonica_mod/. ./src/";
108108+ };
109109+ leptonica_modded = leptonica.overrideAttrs ({ patches ? [], ... }: {
110110+ patches = patches ++ [ leptonica_patch ];
48111 });
112112+113113+ tesseract_patch = mkPatch {
114114+ name = "tesseract";
115115+ src = fetchFromGitHub {
116116+ owner = "tesseract-ocr";
117117+ repo = "tesseract";
118118+ rev = "4.1.1";
119119+ sha256 = "1ca27zbjpx35nxh9fha410z3jskwyj06i5hqiqdc08s2d7kdivwn";
120120+ };
121121+ patchCommands = ''
122122+ cp ${k2pdfopt_src}/tesseract_mod/{baseapi,tesscapi,tesseract}.* src/api/
123123+ cp ${k2pdfopt_src}/tesseract_mod/{tesscapi,tessedit,tesseract}.* src/ccmain/
124124+ cp ${k2pdfopt_src}/tesseract_mod/dotproduct{avx,fma,sse}.* src/arch/
125125+ cp ${k2pdfopt_src}/tesseract_mod/{intsimdmatrixsse,simddetect}.* src/arch/
126126+ cp ${k2pdfopt_src}/tesseract_mod/{errcode,genericvector,mainblk,params,serialis,tessdatamanager,tess_version,tprintf,unicharset}.* src/ccutil/
127127+ cp ${k2pdfopt_src}/tesseract_mod/{input,lstmrecognizer}.* src/lstm/
128128+ cp ${k2pdfopt_src}/tesseract_mod/openclwrapper.* src/opencl/
129129+ '';
130130+ };
49131 tesseract_modded = tesseract4.override {
5050- tesseractBase = tesseract4.tesseractBase.overrideAttrs (_: {
5151- patches = [ ./tesseract.patch ]; # Last verified with tesseract 1.4
132132+ tesseractBase = tesseract4.tesseractBase.overrideAttrs ({ patches ? [], ... }: {
133133+ patches = patches ++ [ tesseract_patch ];
134134+ # Additional compilation fixes
135135+ postPatch = ''
136136+ echo libtesseract_api_la_SOURCES += tesscapi.cpp >> src/api/Makefile.am
137137+ substituteInPlace src/api/tesseract.h \
138138+ --replace "#include <leptonica.h>" "//#include <leptonica.h>"
139139+ '';
52140 });
53141 };
54142 in