k2pdfopt: 2.32 -> 2.42

+162 -170
+77 -90
pkgs/applications/misc/k2pdfopt/default.nix
··· 1 - # Build procedure lifted from https://aur.archlinux.org/packages/k2/k2pdfopt/PKGBUILD 2 - { stdenv, fetchzip, fetchurl, writeScript, libX11, libXext, autoconf, automake, libtool 3 - , leptonica, libpng, libtiff, zlib, openjpeg, freetype, jbig2dec, djvulibre 4 - , openssl }: 5 - 6 - let 7 - mupdf_src = fetchurl { 8 - url = http://www.mupdf.com/downloads/archive/mupdf-1.6-source.tar.gz; 9 - sha256 = "0qx51rj6alzcagcixm59rvdpm54w6syrwr4184v439jh14ryw4wq"; 10 - }; 11 - 12 - tess_src = fetchurl { 13 - url = http://tesseract-ocr.googlecode.com/files/tesseract-ocr-3.02.02.tar.gz; 14 - sha256 = "0g81m9y4iydp7kgr56mlkvjdwpp3mb01q385yhdnyvra7z5kkk96"; 15 - }; 16 17 - gocr_src = fetchurl { 18 - url = http://www-e.uni-magdeburg.de/jschulen/ocr/gocr-0.49.tar.gz; 19 - sha256 = "06hpzp7rkkwfr1fvmc8kcfz9v490i9yir7f7imh13gmka0fr6afc"; 20 - }; 21 22 - in stdenv.mkDerivation rec { 23 name = "k2pdfopt-${version}"; 24 - version = "2.32"; 25 src = fetchzip { 26 url = "http://www.willus.com/k2pdfopt/src/k2pdfopt_v${version}_src.zip"; 27 - sha256 = "1v3cj5bwpjvy7s66sfqcmkxs91f7nxaykjpdjm2wn87vn6q7n19m"; 28 }; 29 30 - buildInputs = [ libX11 libXext autoconf automake libtool leptonica libpng libtiff zlib 31 - openjpeg freetype jbig2dec djvulibre openssl ]; 32 - NIX_LDFLAGS = "-lX11 -lXext"; 33 34 - hardeningDisable = [ "format" ]; 35 36 - k2_pa = ./k2pdfopt.patch; 37 - tess_pa = ./tesseract.patch; 38 39 - builder = writeScript "builder.sh" '' 40 - . ${stdenv}/setup 41 - set -e 42 - 43 - plibs=`pwd`/patched_libraries 44 - 45 - tar zxf ${mupdf_src} 46 - cp $src/mupdf_mod/font.c $src/mupdf_mod/string.c mupdf-1.6-source/source/fitz/ 47 - cp $src/mupdf_mod/pdf-* mupdf-1.6-source/source/pdf 48 - 49 - tar zxf ${tess_src} 50 - cp $src/tesseract_mod/dawg.cpp tesseract-ocr/dict 51 - cp $src/tesseract_mod/tessdatamanager.cpp tesseract-ocr/ccutil 52 - cp $src/tesseract_mod/tessedit.cpp tesseract-ocr/ccmain 53 - cp $src/tesseract_mod/tesscapi.cpp tesseract-ocr/api 54 - cp $src/include_mod/tesseract.h $src/include_mod/leptonica.h tesseract-ocr/api 55 56 - cp -a $src k2pdfopt_v2.21 57 - chmod -R +w k2pdfopt_v2.21 58 59 - patch -p0 -i $tess_pa 60 - patch -p0 -i $k2_pa 61 62 - cd tesseract-ocr 63 - ./autogen.sh 64 - substituteInPlace "configure" \ 65 - --replace 'LIBLEPT_HEADERSDIR="/usr/local/include /usr/include"' \ 66 - 'LIBLEPT_HEADERSDIR=${leptonica}/include' 67 - ./configure --prefix=$plibs --disable-shared 68 - make install 69 - 70 - cd .. 71 - tar zxf ${gocr_src} 72 - cd gocr-0.49 73 - ./configure 74 - cp src/{gocr.h,pnm.h,unicode.h,list.h} $plibs/include 75 - cp include/config.h $plibs/include 76 - make libs 77 - cp src/libPgm2asc.a $plibs/lib 78 - 79 - cd ../mupdf-1.6-source 80 - make prefix=$plibs install 81 - install -Dm644 build/debug/libmujs.a $plibs/lib 82 - 83 - cd ../k2pdfopt_v2.21/k2pdfoptlib 84 - gcc -Ofast -Wall -c *.c -I ../include_mod/ -I $plibs/include \ 85 - -I . -I ../willuslib 86 - ar rcs libk2pdfopt.a *.o 87 - 88 - cd ../willuslib 89 - gcc -Ofast -Wall -c *.c -I ../include_mod/ -I $plibs/include 90 - ar rcs libwillus.a *.o 91 - 92 - cd .. 93 - gcc -Wall -Ofast -o k2pdfopt.o -c k2pdfopt.c -I k2pdfoptlib/ -I willuslib/ \ 94 - -I include_mod/ -I $plibs/include 95 - g++ -Ofast k2pdfopt.o -o k2pdfopt -I willuslib/ -I k2pdfoptlib/ -I include_mod/ \ 96 - -I $plibs/include -L $plibs/lib/ \ 97 - -L willuslib/ -L k2pdfoptlib/ -lk2pdfopt -lwillus -ldjvulibre -lz -lmupdf \ 98 - -ljbig2dec -ljpeg -lopenjp2 -lpng -lfreetype -lpthread -lmujs \ 99 - -lPgm2asc -llept -ltesseract -lcrypto 100 - 101 - mkdir -p $out/bin 102 - cp k2pdfopt $out/bin 103 ''; 104 105 meta = with stdenv.lib; { ··· 107 homepage = http://www.willus.com/k2pdfopt; 108 license = licenses.gpl3; 109 platforms = platforms.linux; 110 - maintainers = [ maintainers.bosu ]; 111 }; 112 } 113
··· 1 + { stdenv, fetchzip, fetchurl, fetchpatch, cmake, pkgconfig 2 + , zlib, libpng 3 + , enableGSL ? true, gsl 4 + , enableGhostScript ? true, ghostscript 5 + , enableMuPDF ? true, jbig2dec, openjpeg, freetype, harfbuzz, mupdf 6 + , enableJPEG2K ? true, jasper 7 + , enableDJVU ? true, djvulibre 8 + , enableGOCR ? false, gocr # Disabled by default due to crashes 9 + , enableTesseract ? true, leptonica, tesseract 10 + }: 11 12 + with stdenv.lib; 13 14 + stdenv.mkDerivation rec { 15 name = "k2pdfopt-${version}"; 16 + version = "2.42"; 17 + 18 src = fetchzip { 19 url = "http://www.willus.com/k2pdfopt/src/k2pdfopt_v${version}_src.zip"; 20 + sha256 = "1zag4jmkr0qrcpqqb5davmvdrabhdyz87q4zz0xpfkl6xw2dn9bk"; 21 }; 22 23 + patches = [ ./k2pdfopt.patch ]; 24 25 + nativeBuildInputs = [ cmake pkgconfig ]; 26 27 + buildInputs = 28 + let 29 + mupdf_modded = mupdf.overrideAttrs (attrs: { 30 + name = "mupdf-1.10a"; 31 + src = fetchurl { 32 + url = "http://mupdf.com/downloads/archive/mupdf-1.10a-source.tar.gz"; 33 + sha256 = "0dm8wcs8i29aibzkqkrn8kcnk4q0kd1v66pg48h5c3qqp4v1zk5a"; 34 + }; 35 + # Excluded the pdf-*.c files, since they mostly just broke the #includes 36 + prePatch = '' 37 + cp ${src}/mupdf_mod/{font,stext-device,string}.c source/fitz/ 38 + cp ${src}/mupdf_mod/font-win32.c source/pdf/ 39 + ''; 40 + # Patches from previous 1.10a version in nixpkgs 41 + patches = [ 42 + # Compatibility with new openjpeg 43 + (fetchpatch { 44 + name = "mupdf-1.9a-openjpeg-2.1.1.patch"; 45 + url = "https://git.archlinux.org/svntogit/community.git/plain/mupdf/trunk/0001-mupdf-openjpeg.patch?id=5a28ad0a8999a9234aa7848096041992cc988099"; 46 + sha256 = "1i24qr4xagyapx4bijjfksj4g3bxz8vs5c2mn61nkm29c63knp75"; 47 + }) 48 49 + (fetchurl { 50 + name = "CVE-2017-5896.patch"; 51 + url = "http://git.ghostscript.com/?p=mupdf.git;a=patch;h=2c4e5867ee699b1081527bc6c6ea0e99a35a5c27"; 52 + sha256 = "14k7x47ifx82sds1c06ibzbmcparfg80719jhgwjk6w1vkh4r693"; 53 + }) 54 + ]; 55 + }); 56 + leptonica_modded = leptonica.overrideAttrs (attrs: { 57 + prePatch = '' 58 + cp ${src}/leptonica_mod/* src/ 59 + ''; 60 + }); 61 + tesseract_modded = tesseract.overrideAttrs (attrs: { 62 + prePatch = '' 63 + cp ${src}/tesseract_mod/{ambigs.cpp,ccutil.h,ccutil.cpp} ccutil/ 64 + cp ${src}/tesseract_mod/dawg.cpp api/ 65 + cp ${src}/tesseract_mod/{imagedata.cpp,tessdatamanager.cpp} ccstruct/ 66 + cp ${src}/tesseract_mod/openclwrapper.h opencl/ 67 + cp ${src}/tesseract_mod/{tessedit.cpp,thresholder.cpp} ccmain/ 68 + cp ${src}/tesseract_mod/tess_lang_mod_edge.h cube/ 69 + cp ${src}/tesseract_mod/tesscapi.cpp api/ 70 + cp ${src}/include_mod/{tesseract.h,leptonica.h} api/ 71 + ''; 72 + patches = [ ./tesseract.patch ]; 73 + }); 74 + in 75 + [ zlib libpng ] ++ 76 + optional enableGSL gsl ++ 77 + optional enableGhostScript ghostscript ++ 78 + optionals enableMuPDF [ jbig2dec openjpeg freetype harfbuzz mupdf_modded ] ++ 79 + optionals enableJPEG2K [ jasper ] ++ 80 + optional enableDJVU djvulibre ++ 81 + optional enableGOCR gocr ++ 82 + optionals enableTesseract [ leptonica_modded tesseract_modded ]; 83 84 + dontUseCmakeBuildDir = true; 85 86 + cmakeFlags = [ "-DCMAKE_C_FLAGS=-I${src}/include_mod" ]; 87 88 + installPhase = '' 89 + install -D -m 755 k2pdfopt $out/bin/k2pdfopt 90 ''; 91 92 meta = with stdenv.lib; { ··· 94 homepage = http://www.willus.com/k2pdfopt; 95 license = licenses.gpl3; 96 platforms = platforms.linux; 97 + maintainers = with maintainers; [ bosu danielfullmer ]; 98 }; 99 } 100
+78 -74
pkgs/applications/misc/k2pdfopt/k2pdfopt.patch
··· 1 - diff -aur k2pdfopt_v2.21/willuslib/array.c k2pdfopt_v2.21.new/willuslib/array.c 2 - --- k2pdfopt_v2.21/willuslib/array.c 2014-05-23 16:29:58.000000000 -0300 3 - +++ k2pdfopt_v2.21.new/willuslib/array.c 2014-07-26 11:35:49.829825567 -0300 4 - @@ -1055,7 +1055,7 @@ 5 - void arrayf_sort(float *a,int n) 6 7 - { 8 - - sort(a,(long)n); 9 - + willus_sort(a,(long)n); 10 - } 11 12 13 - diff -aur k2pdfopt_v2.21/willuslib/math.c k2pdfopt_v2.21.new/willuslib/math.c 14 - --- k2pdfopt_v2.21/willuslib/math.c 2013-08-15 21:33:50.000000000 -0300 15 - +++ k2pdfopt_v2.21.new/willuslib/math.c 2014-07-26 11:36:02.853170659 -0300 16 - @@ -532,7 +532,7 @@ 17 18 19 20 - -void sort(float *x,int n) 21 - +void willus_sort(float *x,int n) 22 23 - { 24 - int top,n1; 25 - diff -aur k2pdfopt_v2.21/willuslib/ocrjocr.c k2pdfopt_v2.21.new/willuslib/ocrjocr.c 26 - --- k2pdfopt_v2.21/willuslib/ocrjocr.c 2012-11-12 13:09:42.000000000 -0300 27 - +++ k2pdfopt_v2.21.new/willuslib/ocrjocr.c 2014-07-26 11:36:46.699837185 -0300 28 @@ -29,6 +29,8 @@ 29 #ifdef HAVE_GOCR_LIB 30 #include <gocr.h> 31 32 - +job_t *JOB; 33 + 34 /* 35 ** bmp8 must be grayscale 36 ** (x1,y1) and (x2,y2) from top left of bitmap 37 - @@ -66,6 +68,7 @@ 38 h=y2-y1+1; 39 dh=h+bw*2; 40 job=&_job; 41 - + JOB=job; 42 job_init(job); 43 job_init_image(job); 44 // willus_mem_alloc_warn((void **)&job->src.p.p,w*h,funcname,10); 45 - diff -aur k2pdfopt_v2.21/willuslib/string.c k2pdfopt_v2.21.new/willuslib/string.c 46 - --- k2pdfopt_v2.21/willuslib/string.c 2014-02-03 00:37:44.000000000 -0300 47 - +++ k2pdfopt_v2.21.new/willuslib/string.c 2014-07-26 11:37:01.766506277 -0300 48 - @@ -81,7 +81,7 @@ 49 - ** Returns NULL if EOF, otherwise returns pointer to the string. 50 - ** 51 - */ 52 - -char *get_line(char *buf,int max,FILE *f) 53 - +char *willus_get_line(char *buf,int max,FILE *f) 54 - 55 - { 56 - int i; 57 - diff -aur k2pdfopt_v2.21/willuslib/willus.h k2pdfopt_v2.21.new/willuslib/willus.h 58 - --- k2pdfopt_v2.21/willuslib/willus.h 2014-07-25 15:03:51.000000000 -0300 59 - +++ k2pdfopt_v2.21.new/willuslib/willus.h 2014-07-26 11:37:56.316506038 -0300 60 - @@ -214,9 +214,6 @@ 61 - ** CMAKE handles the defines, not this source 62 - ** (Mod from Dirk Thierbach, 31-Dec-2013) 63 - */ 64 - -#ifdef USE_CMAKE 65 - -#include "config.h" 66 - -#else /* USE_CMAKE */ 67 - 68 - #ifndef HAVE_Z_LIB 69 - #define HAVE_Z_LIB 70 - @@ -268,7 +265,6 @@ 71 - #undef HAVE_GSL_LIB 72 - #endif 73 - 74 - -#endif /* USE_CMAKE */ 75 - /* 76 - ** Consistency check 77 - */ 78 - @@ -533,7 +529,7 @@ 79 - int *n,FILE *err); 80 - int readxyz_ex (char *filename,double **x,double **y,double **z, 81 - int *n,FILE *err,int ignore_after_semicolon); 82 - -void sort (float *x,int n); 83 - +void willus_sort (float *x,int n); 84 - void sortd (double *x,int n); 85 - void sorti (int *x,int n); 86 - void sortxy (float *x,float *y,int n); 87 - @@ -602,7 +598,7 @@ 88 - /* string.c */ 89 - void clean_line (char *buf); 90 - void clean_line_end(char *buf); 91 - -char *get_line (char *buf,int max,FILE *f); 92 - +char *willus_get_line (char *buf,int max,FILE *f); 93 - char *get_line_cf (char *buf,int max,FILE *f); 94 - int mem_get_line_cf(char *buf,int maxlen,char *cptr,long *cindex,long csize); 95 - int in_string (char *buffer,char *pattern);
··· 1 + diff --git a/CMakeLists.txt b/CMakeLists.txt 2 + index 4a2378b..502c477 100644 3 + --- a/CMakeLists.txt 4 + +++ b/CMakeLists.txt 5 + @@ -52,6 +52,7 @@ endif(JPEG_FOUND) 6 + include(FindJasper) 7 + if(JASPER_FOUND) 8 + set(HAVE_JASPER_LIB 1) 9 + + set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${JASPER_LIBRARY}) 10 + endif(JASPER_FOUND) 11 12 + # paths from willuslib/wgs.c 13 + @@ -66,8 +67,12 @@ else() 14 + message(STATUS "Could NOT find ghostscript executable") 15 + endif(GHOSTSCRIPT_EXECUTABLE) 16 17 + -# willus.h 18 + -# HAVE_GSL_LIB 19 + +pkg_check_modules(GSL gsl) 20 + +if(MUPDF_FOUND) 21 + + set(HAVE_GSL_LIB 1) 22 + + include_directories(SYSTEM ${GSL_INCLUDEDIR}) 23 + + set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${GSL_LDFLAGS}) 24 + +endif(MUPDF_FOUND) 25 26 27 + # libfreetype6 (>= 2.3.9), libjbig2dec0, libjpeg8 (>= 8c), libx11-6, libxext6, zlib1g (>= 1:1.2.0) 28 + @@ -80,7 +85,7 @@ if(MUPDF_FOUND) 29 + include_directories(SYSTEM ${MUPDF_INCLUDEDIR}) 30 + message(STATUS "mupdf libraries: ${MUPDF_LDFLAGS}") 31 + set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${MUPDF_LDFLAGS} 32 + - -lmupdf-js-none -lopenjpeg -ljbig2dec -ljpeg -lfreetype 33 + + -lopenjp2 -ljbig2dec -ljpeg -lfreetype -lharfbuzz 34 + ) 35 + endif(MUPDF_FOUND) 36 37 + @@ -91,9 +96,25 @@ if(DJVU_FOUND) 38 + set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${DJVU_LDFLAGS}) 39 + endif(DJVU_FOUND) 40 41 + -# HAVE_GOCR_LIB 42 + -# HAVE_LEPTONICA_LIB 43 + -# HAVE_TESSERACT_LIB 44 + +find_library(GOCR_LIB NAMES Pgm2asc) 45 + +if(GOCR_LIB) 46 + + set(HAVE_GOCR_LIB 1) 47 + + set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${GOCR_LIB}) 48 + +endif(GOCR_LIB) 49 + + 50 + +pkg_check_modules(LEPTONICA lept) 51 + +if(LEPTONICA_FOUND) 52 + + set(HAVE_LEPTONICA_LIB 1) 53 + + include_directories(SYSTEM ${LEPTONICA_INCLUDEDIR}) 54 + + set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${LEPTONICA_LDFLAGS}) 55 + +endif(LEPTONICA_FOUND) 56 + + 57 + +pkg_check_modules(TESSERACT tesseract) 58 + +if(TESSERACT_FOUND) 59 + + set(HAVE_TESSERACT_LIB 1) 60 + + include_directories(SYSTEM ${TESSERACT_INCLUDEDIR}) 61 + + set(K2PDFOPT_LIB ${K2PDFOPT_LIB} ${TESSERACT_LDFLAGS}) 62 + +endif(TESSERACT_FOUND) 63 64 + # ---- Describe project 65 + 66 + diff --git a/willuslib/CMakeLists.txt b/willuslib/CMakeLists.txt 67 + index 463bbc9..8043db5 100644 68 + --- a/willuslib/CMakeLists.txt 69 + +++ b/willuslib/CMakeLists.txt 70 + @@ -6,7 +6,7 @@ include_directories(..) 71 + set(WILLUSLIB_SRC 72 + ansi.c array.c bmp.c bmpdjvu.c bmpmupdf.c dtcompress.c filelist.c 73 + fontdata.c fontrender.c gslpolyfit.c linux.c math.c mem.c ocr.c 74 + - ocrjocr.c ocrtess.c pdfwrite.c point2d.c render.c strbuf.c string.c 75 + + ocrgocr.c ocrtess.c pdfwrite.c point2d.c render.c strbuf.c string.c 76 + token.c wfile.c wgs.c wgui.c willusversion.c win.c winbmp.c 77 + wincomdlg.c winmbox.c winshell.c wmupdf.c wmupdfinfo.c wpdf.c wsys.c 78 + wzfile.c wleptonica.c 79 + diff --git a/willuslib/ocrgocr.c b/willuslib/ocrgocr.c 80 + index 6027e9a..fbe10f0 100644 81 + --- a/willuslib/ocrgocr.c 82 + +++ b/willuslib/ocrgocr.c 83 @@ -29,6 +29,8 @@ 84 #ifdef HAVE_GOCR_LIB 85 #include <gocr.h> 86 87 + +job_t *OCR_JOB; 88 + 89 /* 90 ** bmp8 must be grayscale 91 ** (x1,y1) and (x2,y2) from top left of bitmap 92 + @@ -63,6 +65,7 @@ void gocr_single_word_from_bmp8(char *text,int maxlen,WILLUSBITMAP *bmp8, 93 h=y2-y1+1; 94 dh=h+bw*2; 95 job=&_job; 96 + + OCR_JOB=job; 97 job_init(job); 98 job_init_image(job); 99 // willus_mem_alloc_warn((void **)&job->src.p.p,w*h,funcname,10);
+7 -6
pkgs/applications/misc/k2pdfopt/tesseract.patch
··· 1 - diff -aur tesseract-ocr/api/Makefile.am tesseract-ocr.new/api/Makefile.am 2 - --- tesseract-ocr/api/Makefile.am 2012-10-09 14:18:39.000000000 -0300 3 - +++ tesseract-ocr.new/api/Makefile.am 2014-03-20 18:43:13.926030341 -0300 4 - @@ -36,7 +36,7 @@ 5 if VISIBILITY 6 libtesseract_api_la_CPPFLAGS += -DTESS_EXPORTS 7 endif 8 - -libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp 9 - +libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp tesscapi.cpp 10 11 lib_LTLIBRARIES += libtesseract.la 12 libtesseract_la_LDFLAGS =
··· 1 + diff --git a/api/Makefile.am b/api/Makefile.am 2 + index d8c1e54..46ead13 100644 3 + --- a/api/Makefile.am 4 + +++ b/api/Makefile.am 5 + @@ -42,7 +42,7 @@ libtesseract_api_la_CPPFLAGS = $(AM_CPPFLAGS) 6 if VISIBILITY 7 libtesseract_api_la_CPPFLAGS += -DTESS_EXPORTS 8 endif 9 + -libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp renderer.cpp pdfrenderer.cpp 10 + +libtesseract_api_la_SOURCES = baseapi.cpp capi.cpp renderer.cpp pdfrenderer.cpp tesscapi.cpp 11 12 lib_LTLIBRARIES += libtesseract.la 13 libtesseract_la_LDFLAGS =