tesseract: make tessdata a fix output derivation (#41227)

the full tessdata is nearly a GB, so sparing a copy each time we need to
rebuild tesseract without updating tessdata is worth it.

authored by symphorien and committed by xeji b30d5290 88070395

+34 -30
+34 -30
pkgs/applications/graphics/tesseract/default.nix
··· 1 { stdenv, fetchFromGitHub, autoreconfHook, pkgconfig 2 , leptonica, libpng, libtiff, icu, pango, opencl-headers 3 - 4 # Supported list of languages or `null' for all available languages 5 , enableLanguages ? null 6 }: 7 8 stdenv.mkDerivation rec { 9 name = "tesseract-${version}"; 10 version = "3.05.00"; ··· 16 sha256 = "11wrpcfl118wxsv2c3w2scznwb48c4547qml42s2bpdz079g8y30"; 17 }; 18 19 - tessdata = fetchFromGitHub { 20 - owner = "tesseract-ocr"; 21 - repo = "tessdata"; 22 - rev = "3cf1e2df1fe1d1da29295c9ef0983796c7958b7d"; 23 - sha256 = "1v4b63v5nzcxr2y3635r19l7lj5smjmc9vfk0wmxlryxncb4vpg7"; 24 - }; 25 26 nativeBuildInputs = [ pkgconfig autoreconfHook ]; 27 buildInputs = [ leptonica libpng libtiff icu pango opencl-headers ]; 28 29 LIBLEPT_HEADERSDIR = "${leptonica}/include"; 30 31 - # Copy the .traineddata files of the languages specified in enableLanguages 32 - # into `$out/share/tessdata' and check afterwards if copying was successful. 33 - postInstall = let 34 - mkArg = lang: "-iname ${stdenv.lib.escapeShellArg "${lang}.traineddata"}"; 35 - mkFindArgs = stdenv.lib.concatMapStringsSep " -o " mkArg; 36 - findLangArgs = if enableLanguages != null 37 - then "\\( ${mkFindArgs enableLanguages} \\)" 38 - else "-iname '*.traineddata'"; 39 - in '' 40 - numLangs="$(find "$tessdata" -mindepth 1 -maxdepth 1 -type f \ 41 - ${findLangArgs} -exec cp -t "$out/share/tessdata" {} + -print | wc -l)" 42 - 43 - ${if enableLanguages != null then '' 44 - expected=${toString (builtins.length enableLanguages)} 45 - '' else '' 46 - expected="$(ls -1 "$tessdata/"*.traineddata | wc -l)" 47 - ''} 48 - 49 - if [ "$numLangs" -ne "$expected" ]; then 50 - echo "Expected $expected languages, but $numLangs" \ 51 - "were copied to \`$out/share/tessdata'" >&2 52 - exit 1 53 - fi 54 ''; 55 56 meta = {
··· 1 { stdenv, fetchFromGitHub, autoreconfHook, pkgconfig 2 , leptonica, libpng, libtiff, icu, pango, opencl-headers 3 # Supported list of languages or `null' for all available languages 4 , enableLanguages ? null 5 + # if you want just a specific list of languages, optionally specify a hash 6 + # to make tessdata a fixed output derivation. 7 + , enableLanguagesHash ? (if enableLanguages == null # all languages 8 + then "1h48xfzabhn0ldbx5ib67cp9607pr0zpblsy8z6fs4knn0zznfnw" 9 + else null) 10 }: 11 12 + let tessdata = stdenv.mkDerivation ({ 13 + name = "tessdata"; 14 + src = fetchFromGitHub { 15 + owner = "tesseract-ocr"; 16 + repo = "tessdata"; 17 + rev = "3cf1e2df1fe1d1da29295c9ef0983796c7958b7d"; 18 + # when updating don't forget to update the default value fo enableLanguagesHash 19 + sha256 = "1v4b63v5nzcxr2y3635r19l7lj5smjmc9vfk0wmxlryxncb4vpg7"; 20 + }; 21 + buildCommand = '' 22 + cd $src; 23 + for lang in ${if enableLanguages==null then "*.traineddata" else stdenv.lib.concatMapStringsSep " " (x: x+".traineddata") enableLanguages} ; do 24 + install -Dt $out/share/tessdata $src/$lang ; 25 + done; 26 + ''; 27 + preferLocalBuild = true; 28 + } // (stdenv.lib.optionalAttrs (enableLanguagesHash != null) { 29 + # when a hash is given, we make this a fixed output derivation. 30 + outputHashMode = "recursive"; 31 + outputHashAlgo = "sha256"; 32 + outputHash = enableLanguagesHash; 33 + })); 34 + in 35 + 36 stdenv.mkDerivation rec { 37 name = "tesseract-${version}"; 38 version = "3.05.00"; ··· 44 sha256 = "11wrpcfl118wxsv2c3w2scznwb48c4547qml42s2bpdz079g8y30"; 45 }; 46 47 + enableParallelBuilding = true; 48 49 nativeBuildInputs = [ pkgconfig autoreconfHook ]; 50 buildInputs = [ leptonica libpng libtiff icu pango opencl-headers ]; 51 52 LIBLEPT_HEADERSDIR = "${leptonica}/include"; 53 54 + postInstall = '' 55 + for i in ${tessdata}/share/tessdata/*; do 56 + ln -s $i $out/share/tessdata; 57 + done 58 ''; 59 60 meta = {