1{ lib 2, buildPythonPackage 3, pythonRelaxDepsHook 4, fetchFromGitHub 5, attrdict 6, beautifulsoup4 7, cython 8, fire 9, fonttools 10, lmdb 11, lxml 12, numpy 13, opencv4 14, openpyxl 15, pdf2docx 16, pillow 17, premailer 18, pyclipper 19, pymupdf 20, python-docx 21, rapidfuzz 22, scikit-image 23, shapely 24, tqdm 25, paddlepaddle 26, lanms-neo 27, polygon3 28}: 29 30let 31 version = "2.7.1"; 32in 33buildPythonPackage { 34 pname = "paddleocr"; 35 inherit version; 36 format = "setuptools"; 37 38 src = fetchFromGitHub { 39 owner = "PaddlePaddle"; 40 repo = "PaddleOCR"; 41 rev = "v${version}"; 42 hash = "sha256-5Dt4UL+7dwJNjcNnCVi3o8bLCt7/m/M6oh1vPu9rza8="; 43 }; 44 45 patches = [ 46 # The `ppocr.data.imaug` re-exports the `IaaAugment` and `CopyPaste` 47 # classes. These classes depend on the `imgaug` package which is 48 # unmaintained and has been removed from nixpkgs. 49 # 50 # The image OCR feature of PaddleOCR doesn't use these classes though, so 51 # they work even after stripping the the `IaaAugment` and `CopyPaste` 52 # exports. It probably breaks some of the OCR model creation tooling that 53 # PaddleOCR provides, however. 54 ./remove-import-imaug.patch 55 ]; 56 57 nativeBuildInputs = [ pythonRelaxDepsHook ]; 58 # trying to relax only pymupdf makes the whole build fail 59 pythonRelaxDeps = true; 60 pythonRemoveDeps = [ 61 "imgaug" 62 "visualdl" 63 "opencv-python" 64 "opencv-contrib-python" 65 ]; 66 67 propagatedBuildInputs = [ 68 attrdict 69 beautifulsoup4 70 cython 71 fire 72 fonttools 73 lmdb 74 lxml 75 numpy 76 opencv4 77 openpyxl 78 pdf2docx 79 pillow 80 premailer 81 pyclipper 82 pymupdf 83 python-docx 84 rapidfuzz 85 scikit-image 86 shapely 87 tqdm 88 paddlepaddle 89 lanms-neo 90 polygon3 91 ]; 92 93 # TODO: The tests depend, among possibly other things, on `cudatoolkit`. 94 # But Cudatoolkit fails to install. 95 # preCheck = "export HOME=$TMPDIR"; 96 # nativeCheckInputs = with pkgs; [ which cudatoolkit ]; 97 doCheck = false; 98 99 meta = with lib; { 100 homepage = "https://github.com/PaddlePaddle/PaddleOCR"; 101 license = licenses.asl20; 102 description = "Multilingual OCR toolkits based on PaddlePaddle"; 103 longDescription = '' 104 PaddleOCR aims to create multilingual, awesome, leading, and practical OCR 105 tools that help users train better models and apply them into practice. 106 ''; 107 changelog = "https://github.com/PaddlePaddle/PaddleOCR/releases/tag/v${version}"; 108 maintainers = with maintainers; [ happysalada ]; 109 platforms = [ "x86_64-linux" "x86_64-darwin" "aarch64-darwin" ]; 110 }; 111}