pkgs/development/python-modules/paddleocr/default.nix at 23.11-beta · pyrox.dev/nixpkgs

pyrox.dev / nixpkgs
lol
nixpkgs / pkgs / development / python-modules / paddleocr / default.nix
at 23.11-beta 2.5 kB view raw
  1{ lib
  2, buildPythonPackage
  3, pythonRelaxDepsHook
  4, fetchFromGitHub
  5, attrdict
  6, beautifulsoup4
  7, cython
  8, fire
  9, fonttools
 10, lmdb
 11, lxml
 12, numpy
 13, opencv4
 14, openpyxl
 15, pdf2docx
 16, pillow
 17, premailer
 18, pyclipper
 19, pymupdf
 20, python-docx
 21, rapidfuzz
 22, scikit-image
 23, shapely
 24, tqdm
 25, paddlepaddle
 26, lanms-neo
 27, polygon3
 28}:
 29
 30let
 31  version = "2.7.1";
 32in
 33buildPythonPackage {
 34  pname = "paddleocr";
 35  inherit version;
 36  format = "setuptools";
 37
 38  src = fetchFromGitHub {
 39    owner = "PaddlePaddle";
 40    repo = "PaddleOCR";
 41    rev = "v${version}";
 42    hash = "sha256-5Dt4UL+7dwJNjcNnCVi3o8bLCt7/m/M6oh1vPu9rza8=";
 43  };
 44
 45  patches = [
 46    # The `ppocr.data.imaug` re-exports the `IaaAugment` and `CopyPaste`
 47    # classes. These classes depend on the `imgaug` package which is
 48    # unmaintained and has been removed from nixpkgs.
 49    #
 50    # The image OCR feature of PaddleOCR doesn't use these classes though, so
 51    # they work even after stripping the the `IaaAugment` and `CopyPaste`
 52    # exports. It probably breaks some of the OCR model creation tooling that
 53    # PaddleOCR provides, however.
 54    ./remove-import-imaug.patch
 55  ];
 56
 57  nativeBuildInputs = [ pythonRelaxDepsHook ];
 58  # trying to relax only pymupdf makes the whole build fail
 59  pythonRelaxDeps = true;
 60  pythonRemoveDeps = [
 61    "imgaug"
 62    "visualdl"
 63    "opencv-python"
 64    "opencv-contrib-python"
 65  ];
 66
 67  propagatedBuildInputs = [
 68    attrdict
 69    beautifulsoup4
 70    cython
 71    fire
 72    fonttools
 73    lmdb
 74    lxml
 75    numpy
 76    opencv4
 77    openpyxl
 78    pdf2docx
 79    pillow
 80    premailer
 81    pyclipper
 82    pymupdf
 83    python-docx
 84    rapidfuzz
 85    scikit-image
 86    shapely
 87    tqdm
 88    paddlepaddle
 89    lanms-neo
 90    polygon3
 91  ];
 92
 93  # TODO: The tests depend, among possibly other things, on `cudatoolkit`.
 94  # But Cudatoolkit fails to install.
 95  # preCheck = "export HOME=$TMPDIR";
 96  # nativeCheckInputs = with pkgs; [ which cudatoolkit ];
 97  doCheck = false;
 98
 99  meta = with lib; {
100    homepage = "https://github.com/PaddlePaddle/PaddleOCR";
101    license = licenses.asl20;
102    description = "Multilingual OCR toolkits based on PaddlePaddle";
103    longDescription = ''
104      PaddleOCR aims to create multilingual, awesome, leading, and practical OCR
105      tools that help users train better models and apply them into practice.
106    '';
107    changelog = "https://github.com/PaddlePaddle/PaddleOCR/releases/tag/v${version}";
108    maintainers = with maintainers; [ happysalada ];
109    platforms = [ "x86_64-linux" "x86_64-darwin" "aarch64-darwin" ];
110  };
111}