pkgs/development/python-modules/paddleocr/default.nix at 25.11-pre · pyrox.dev/nixpkgs

pyrox.dev / nixpkgs
lol
nixpkgs / pkgs / development / python-modules / paddleocr / default.nix
at 25.11-pre 2.5 kB view raw
  1{
  2  lib,
  3  buildPythonPackage,
  4  fetchFromGitHub,
  5  attrdict,
  6  beautifulsoup4,
  7  cython,
  8  fire,
  9  fonttools,
 10  lmdb,
 11  lxml,
 12  numpy,
 13  opencv-python,
 14  openpyxl,
 15  pdf2docx,
 16  pillow,
 17  pyclipper,
 18  pymupdf,
 19  python-docx,
 20  rapidfuzz,
 21  scikit-image,
 22  shapely,
 23  tqdm,
 24  paddlepaddle,
 25  lanms-neo,
 26  polygon3,
 27}:
 28
 29let
 30  version = "2.9.1";
 31in
 32buildPythonPackage rec {
 33  pname = "paddleocr";
 34  inherit version;
 35  format = "setuptools";
 36
 37  src = fetchFromGitHub {
 38    owner = "PaddlePaddle";
 39    repo = "PaddleOCR";
 40    tag = "v${version}";
 41    hash = "sha256-QCddxgVdLaAJLfKCy+tnQsxownfl1Uv0TXhFRiFi9cY=";
 42  };
 43
 44  patches = [
 45    # The `ppocr.data.imaug` re-exports the `IaaAugment` and `CopyPaste`
 46    # classes. These classes depend on the `imgaug` package which is
 47    # unmaintained and has been removed from nixpkgs.
 48    #
 49    # The image OCR feature of PaddleOCR doesn't use these classes though, so
 50    # they work even after stripping the the `IaaAugment` and `CopyPaste`
 51    # exports. It probably breaks some of the OCR model creation tooling that
 52    # PaddleOCR provides, however.
 53    ./remove-import-imaug.patch
 54  ];
 55
 56  # trying to relax only pymupdf makes the whole build fail
 57  pythonRelaxDeps = true;
 58  pythonRemoveDeps = [
 59    "imgaug"
 60    "visualdl"
 61    "opencv-contrib-python"
 62  ];
 63
 64  propagatedBuildInputs = [
 65    attrdict
 66    beautifulsoup4
 67    cython
 68    fire
 69    fonttools
 70    lmdb
 71    lxml
 72    numpy
 73    opencv-python
 74    openpyxl
 75    pdf2docx
 76    pillow
 77    pyclipper
 78    pymupdf
 79    python-docx
 80    rapidfuzz
 81    scikit-image
 82    shapely
 83    tqdm
 84    paddlepaddle
 85    lanms-neo
 86    polygon3
 87  ];
 88
 89  # TODO: The tests depend, among possibly other things, on `cudatoolkit`.
 90  # But Cudatoolkit fails to install.
 91  # preCheck = "export HOME=$TMPDIR";
 92  # nativeCheckInputs = with pkgs; [ which cudatoolkit ];
 93  doCheck = false;
 94
 95  meta = with lib; {
 96    homepage = "https://github.com/PaddlePaddle/PaddleOCR";
 97    license = licenses.asl20;
 98    description = "Multilingual OCR toolkits based on PaddlePaddle";
 99    longDescription = ''
100      PaddleOCR aims to create multilingual, awesome, leading, and practical OCR
101      tools that help users train better models and apply them into practice.
102    '';
103    changelog = "https://github.com/PaddlePaddle/PaddleOCR/releases/tag/${src.tag}";
104    maintainers = with maintainers; [ happysalada ];
105    platforms = [
106      "x86_64-linux"
107      "x86_64-darwin"
108      "aarch64-darwin"
109    ];
110  };
111}