pkgs/development/python-modules/paddleocr/default.nix at 24.11-pre · pyrox.dev/nixpkgs

pyrox.dev / nixpkgs
lol
nixpkgs / pkgs / development / python-modules / paddleocr / default.nix
at 24.11-pre 2.6 kB view raw
  1{
  2  lib,
  3  buildPythonPackage,
  4  pythonRelaxDepsHook,
  5  fetchFromGitHub,
  6  attrdict,
  7  beautifulsoup4,
  8  cython,
  9  fire,
 10  fonttools,
 11  lmdb,
 12  lxml,
 13  numpy,
 14  opencv4,
 15  openpyxl,
 16  pdf2docx,
 17  pillow,
 18  premailer,
 19  pyclipper,
 20  pymupdf,
 21  python-docx,
 22  rapidfuzz,
 23  scikit-image,
 24  shapely,
 25  tqdm,
 26  paddlepaddle,
 27  lanms-neo,
 28  polygon3,
 29}:
 30
 31let
 32  version = "2.7.1";
 33in
 34buildPythonPackage {
 35  pname = "paddleocr";
 36  inherit version;
 37  format = "setuptools";
 38
 39  src = fetchFromGitHub {
 40    owner = "PaddlePaddle";
 41    repo = "PaddleOCR";
 42    rev = "v${version}";
 43    hash = "sha256-5Dt4UL+7dwJNjcNnCVi3o8bLCt7/m/M6oh1vPu9rza8=";
 44  };
 45
 46  patches = [
 47    # The `ppocr.data.imaug` re-exports the `IaaAugment` and `CopyPaste`
 48    # classes. These classes depend on the `imgaug` package which is
 49    # unmaintained and has been removed from nixpkgs.
 50    #
 51    # The image OCR feature of PaddleOCR doesn't use these classes though, so
 52    # they work even after stripping the the `IaaAugment` and `CopyPaste`
 53    # exports. It probably breaks some of the OCR model creation tooling that
 54    # PaddleOCR provides, however.
 55    ./remove-import-imaug.patch
 56  ];
 57
 58  nativeBuildInputs = [ pythonRelaxDepsHook ];
 59  # trying to relax only pymupdf makes the whole build fail
 60  pythonRelaxDeps = true;
 61  pythonRemoveDeps = [
 62    "imgaug"
 63    "visualdl"
 64    "opencv-python"
 65    "opencv-contrib-python"
 66  ];
 67
 68  propagatedBuildInputs = [
 69    attrdict
 70    beautifulsoup4
 71    cython
 72    fire
 73    fonttools
 74    lmdb
 75    lxml
 76    numpy
 77    opencv4
 78    openpyxl
 79    pdf2docx
 80    pillow
 81    premailer
 82    pyclipper
 83    pymupdf
 84    python-docx
 85    rapidfuzz
 86    scikit-image
 87    shapely
 88    tqdm
 89    paddlepaddle
 90    lanms-neo
 91    polygon3
 92  ];
 93
 94  # TODO: The tests depend, among possibly other things, on `cudatoolkit`.
 95  # But Cudatoolkit fails to install.
 96  # preCheck = "export HOME=$TMPDIR";
 97  # nativeCheckInputs = with pkgs; [ which cudatoolkit ];
 98  doCheck = false;
 99
100  meta = with lib; {
101    homepage = "https://github.com/PaddlePaddle/PaddleOCR";
102    license = licenses.asl20;
103    description = "Multilingual OCR toolkits based on PaddlePaddle";
104    longDescription = ''
105      PaddleOCR aims to create multilingual, awesome, leading, and practical OCR
106      tools that help users train better models and apply them into practice.
107    '';
108    changelog = "https://github.com/PaddlePaddle/PaddleOCR/releases/tag/v${version}";
109    maintainers = with maintainers; [ happysalada ];
110    platforms = [
111      "x86_64-linux"
112      "x86_64-darwin"
113      "aarch64-darwin"
114    ];
115  };
116}