Merge pull request #303388 from SuperSandro2000/paperless-only-enabled-languages

nixos/paperless: override enabled tesseract languages with the in paperless configured ones

authored by Leona Maroni and committed by GitHub 025d3a2f b2307803

+30 -18
+24 -15
nixos/modules/services/misc/paperless.nix
··· 3 with lib; 4 let 5 cfg = config.services.paperless; 6 - pkg = cfg.package; 7 8 defaultUser = "paperless"; 9 defaultFont = "${pkgs.liberation_ttf}/share/fonts/truetype/LiberationSerif-Regular.ttf"; ··· 25 } // optionalAttrs (cfg.settings.PAPERLESS_ENABLE_NLTK or true) { 26 PAPERLESS_NLTK_DIR = pkgs.symlinkJoin { 27 name = "paperless_ngx_nltk_data"; 28 - paths = pkg.nltkData; 29 }; 30 } // optionalAttrs (cfg.openMPThreadingWorkaround) { 31 OMP_NUM_THREADS = "1"; ··· 38 manage = pkgs.writeShellScript "manage" '' 39 set -o allexport # Export the following env vars 40 ${lib.toShellVars env} 41 - exec ${pkg}/bin/paperless-ngx "$@" 42 ''; 43 44 # Secure the services ··· 200 description = "User under which Paperless runs."; 201 }; 202 203 - package = mkPackageOption pkgs "paperless-ngx" { }; 204 205 openMPThreadingWorkaround = mkEnableOption '' 206 a workaround for document classifier timeouts. ··· 237 wants = [ "paperless-consumer.service" "paperless-web.service" "paperless-task-queue.service" ]; 238 serviceConfig = defaultServiceConfig // { 239 User = cfg.user; 240 - ExecStart = "${pkg}/bin/celery --app paperless beat --loglevel INFO"; 241 Restart = "on-failure"; 242 LoadCredential = lib.optionalString (cfg.passwordFile != null) "PAPERLESS_ADMIN_PASSWORD:${cfg.passwordFile}"; 243 }; ··· 250 versionFile="${cfg.dataDir}/src-version" 251 version=$(cat "$versionFile" 2>/dev/null || echo 0) 252 253 - if [[ $version != ${pkg.version} ]]; then 254 - ${pkg}/bin/paperless-ngx migrate 255 256 # Parse old version string format for backwards compatibility 257 version=$(echo "$version" | grep -ohP '[^-]+$') ··· 264 if versionLessThan 1.12.0; then 265 # Reindex documents as mentioned in https://github.com/paperless-ngx/paperless-ngx/releases/tag/v1.12.1 266 echo "Reindexing documents, to allow searching old comments. Required after the 1.12.x upgrade." 267 - ${pkg}/bin/paperless-ngx document_index reindex 268 fi 269 270 - echo ${pkg.version} > "$versionFile" 271 fi 272 '' 273 + optionalString (cfg.passwordFile != null) '' ··· 277 superuserStateFile="${cfg.dataDir}/superuser-state" 278 279 if [[ $(cat "$superuserStateFile" 2>/dev/null) != $superuserState ]]; then 280 - ${pkg}/bin/paperless-ngx manage_superuser 281 echo "$superuserState" > "$superuserStateFile" 282 fi 283 ''; ··· 290 after = [ "paperless-scheduler.service" ]; 291 serviceConfig = defaultServiceConfig // { 292 User = cfg.user; 293 - ExecStart = "${pkg}/bin/celery --app paperless worker --loglevel INFO"; 294 Restart = "on-failure"; 295 # The `mbind` syscall is needed for running the classifier. 296 SystemCallFilter = defaultServiceConfig.SystemCallFilter ++ [ "mbind" ]; ··· 308 after = [ "paperless-scheduler.service" ]; 309 serviceConfig = defaultServiceConfig // { 310 User = cfg.user; 311 - ExecStart = "${pkg}/bin/paperless-ngx document_consumer"; 312 Restart = "on-failure"; 313 }; 314 environment = env; ··· 340 echo "PAPERLESS_SECRET_KEY is empty, refusing to start." 341 exit 1 342 fi 343 - exec ${pkg.python.pkgs.gunicorn}/bin/gunicorn \ 344 - -c ${pkg}/lib/paperless-ngx/gunicorn.conf.py paperless.asgi:application 345 ''; 346 serviceConfig = defaultServiceConfig // { 347 User = cfg.user; ··· 357 CapabilityBoundingSet = [ "CAP_NET_BIND_SERVICE" ]; 358 }; 359 environment = env // { 360 - PYTHONPATH = "${pkg.python.pkgs.makePythonPath pkg.propagatedBuildInputs}:${pkg}/lib/paperless-ngx/src"; 361 }; 362 # Allow the web interface to access the private /tmp directory of the server. 363 # This is required to support uploading files via the web interface.
··· 3 with lib; 4 let 5 cfg = config.services.paperless; 6 7 defaultUser = "paperless"; 8 defaultFont = "${pkgs.liberation_ttf}/share/fonts/truetype/LiberationSerif-Regular.ttf"; ··· 24 } // optionalAttrs (cfg.settings.PAPERLESS_ENABLE_NLTK or true) { 25 PAPERLESS_NLTK_DIR = pkgs.symlinkJoin { 26 name = "paperless_ngx_nltk_data"; 27 + paths = cfg.package.nltkData; 28 }; 29 } // optionalAttrs (cfg.openMPThreadingWorkaround) { 30 OMP_NUM_THREADS = "1"; ··· 37 manage = pkgs.writeShellScript "manage" '' 38 set -o allexport # Export the following env vars 39 ${lib.toShellVars env} 40 + exec ${cfg.package}/bin/paperless-ngx "$@" 41 ''; 42 43 # Secure the services ··· 199 description = "User under which Paperless runs."; 200 }; 201 202 + package = mkPackageOption pkgs "paperless-ngx" { } // { 203 + apply = pkg: pkg.override { 204 + tesseract5 = pkg.tesseract5.override { 205 + # always enable detection modules 206 + enableLanguages = if cfg.settings ? PAPERLESS_OCR_LANGUAGE then 207 + [ "equ" "osd" ] 208 + ++ lib.splitString "+" cfg.settings.PAPERLESS_OCR_LANGUAGE 209 + else null; 210 + }; 211 + }; 212 + }; 213 214 openMPThreadingWorkaround = mkEnableOption '' 215 a workaround for document classifier timeouts. ··· 246 wants = [ "paperless-consumer.service" "paperless-web.service" "paperless-task-queue.service" ]; 247 serviceConfig = defaultServiceConfig // { 248 User = cfg.user; 249 + ExecStart = "${cfg.package}/bin/celery --app paperless beat --loglevel INFO"; 250 Restart = "on-failure"; 251 LoadCredential = lib.optionalString (cfg.passwordFile != null) "PAPERLESS_ADMIN_PASSWORD:${cfg.passwordFile}"; 252 }; ··· 259 versionFile="${cfg.dataDir}/src-version" 260 version=$(cat "$versionFile" 2>/dev/null || echo 0) 261 262 + if [[ $version != ${cfg.package.version} ]]; then 263 + ${cfg.package}/bin/paperless-ngx migrate 264 265 # Parse old version string format for backwards compatibility 266 version=$(echo "$version" | grep -ohP '[^-]+$') ··· 273 if versionLessThan 1.12.0; then 274 # Reindex documents as mentioned in https://github.com/paperless-ngx/paperless-ngx/releases/tag/v1.12.1 275 echo "Reindexing documents, to allow searching old comments. Required after the 1.12.x upgrade." 276 + ${cfg.package}/bin/paperless-ngx document_index reindex 277 fi 278 279 + echo ${cfg.package.version} > "$versionFile" 280 fi 281 '' 282 + optionalString (cfg.passwordFile != null) '' ··· 286 superuserStateFile="${cfg.dataDir}/superuser-state" 287 288 if [[ $(cat "$superuserStateFile" 2>/dev/null) != $superuserState ]]; then 289 + ${cfg.package}/bin/paperless-ngx manage_superuser 290 echo "$superuserState" > "$superuserStateFile" 291 fi 292 ''; ··· 299 after = [ "paperless-scheduler.service" ]; 300 serviceConfig = defaultServiceConfig // { 301 User = cfg.user; 302 + ExecStart = "${cfg.package}/bin/celery --app paperless worker --loglevel INFO"; 303 Restart = "on-failure"; 304 # The `mbind` syscall is needed for running the classifier. 305 SystemCallFilter = defaultServiceConfig.SystemCallFilter ++ [ "mbind" ]; ··· 317 after = [ "paperless-scheduler.service" ]; 318 serviceConfig = defaultServiceConfig // { 319 User = cfg.user; 320 + ExecStart = "${cfg.package}/bin/paperless-ngx document_consumer"; 321 Restart = "on-failure"; 322 }; 323 environment = env; ··· 349 echo "PAPERLESS_SECRET_KEY is empty, refusing to start." 350 exit 1 351 fi 352 + exec ${cfg.package.python.pkgs.gunicorn}/bin/gunicorn \ 353 + -c ${cfg.package}/lib/paperless-ngx/gunicorn.conf.py paperless.asgi:application 354 ''; 355 serviceConfig = defaultServiceConfig // { 356 User = cfg.user; ··· 366 CapabilityBoundingSet = [ "CAP_NET_BIND_SERVICE" ]; 367 }; 368 environment = env // { 369 + PYTHONPATH = "${cfg.package.python.pkgs.makePythonPath cfg.package.propagatedBuildInputs}:${cfg.package}/lib/paperless-ngx/src"; 370 }; 371 # Allow the web interface to access the private /tmp directory of the server. 372 # This is required to support uploading files via the web interface.
+6 -3
pkgs/applications/office/paperless-ngx/default.nix
··· 37 # https://github.com/NixOS/nixpkgs/issues/298719 38 # https://github.com/paperless-ngx/paperless-ngx/issues/5494 39 python = python3.override { 40 - packageOverrides = self: super: { 41 - uvicorn = super.uvicorn.overridePythonAttrs (oldAttrs: { 42 version = "0.25.0"; 43 src = fetchFromGitHub { 44 owner = "encode"; ··· 245 doCheck = !stdenv.isDarwin; 246 247 passthru = { 248 - inherit python path frontend; 249 nltkData = with nltk-data; [ punkt snowball_data stopwords ]; 250 tests = { inherit (nixosTests) paperless; }; 251 };
··· 37 # https://github.com/NixOS/nixpkgs/issues/298719 38 # https://github.com/paperless-ngx/paperless-ngx/issues/5494 39 python = python3.override { 40 + packageOverrides = final: prev: { 41 + # tesseract5 may be overwritten in the paperless module and we need to propagate that to make the closure reduction effective 42 + ocrmypdf = prev.ocrmypdf.override { tesseract = tesseract5; }; 43 + 44 + uvicorn = prev.uvicorn.overridePythonAttrs (_: { 45 version = "0.25.0"; 46 src = fetchFromGitHub { 47 owner = "encode"; ··· 248 doCheck = !stdenv.isDarwin; 249 250 passthru = { 251 + inherit python path frontend tesseract5; 252 nltkData = with nltk-data; [ punkt snowball_data stopwords ]; 253 tests = { inherit (nixosTests) paperless; }; 254 };