1{ 2 lib, 3 buildPythonPackage, 4 fetchFromGitHub, 5 attrdict, 6 beautifulsoup4, 7 cython, 8 fire, 9 fonttools, 10 lmdb, 11 lxml, 12 numpy, 13 opencv-python, 14 openpyxl, 15 pdf2docx, 16 pillow, 17 pyclipper, 18 pymupdf, 19 python-docx, 20 rapidfuzz, 21 scikit-image, 22 shapely, 23 tqdm, 24 paddlepaddle, 25 lanms-neo, 26 polygon3, 27}: 28 29let 30 version = "2.9.1"; 31in 32buildPythonPackage rec { 33 pname = "paddleocr"; 34 inherit version; 35 format = "setuptools"; 36 37 src = fetchFromGitHub { 38 owner = "PaddlePaddle"; 39 repo = "PaddleOCR"; 40 tag = "v${version}"; 41 hash = "sha256-QCddxgVdLaAJLfKCy+tnQsxownfl1Uv0TXhFRiFi9cY="; 42 }; 43 44 patches = [ 45 # The `ppocr.data.imaug` re-exports the `IaaAugment` and `CopyPaste` 46 # classes. These classes depend on the `imgaug` package which is 47 # unmaintained and has been removed from nixpkgs. 48 # 49 # The image OCR feature of PaddleOCR doesn't use these classes though, so 50 # they work even after stripping the the `IaaAugment` and `CopyPaste` 51 # exports. It probably breaks some of the OCR model creation tooling that 52 # PaddleOCR provides, however. 53 ./remove-import-imaug.patch 54 ]; 55 56 # trying to relax only pymupdf makes the whole build fail 57 pythonRelaxDeps = true; 58 pythonRemoveDeps = [ 59 "imgaug" 60 "visualdl" 61 "opencv-contrib-python" 62 ]; 63 64 propagatedBuildInputs = [ 65 attrdict 66 beautifulsoup4 67 cython 68 fire 69 fonttools 70 lmdb 71 lxml 72 numpy 73 opencv-python 74 openpyxl 75 pdf2docx 76 pillow 77 pyclipper 78 pymupdf 79 python-docx 80 rapidfuzz 81 scikit-image 82 shapely 83 tqdm 84 paddlepaddle 85 lanms-neo 86 polygon3 87 ]; 88 89 # TODO: The tests depend, among possibly other things, on `cudatoolkit`. 90 # But Cudatoolkit fails to install. 91 # preCheck = "export HOME=$TMPDIR"; 92 # nativeCheckInputs = with pkgs; [ which cudatoolkit ]; 93 doCheck = false; 94 95 meta = with lib; { 96 homepage = "https://github.com/PaddlePaddle/PaddleOCR"; 97 license = licenses.asl20; 98 description = "Multilingual OCR toolkits based on PaddlePaddle"; 99 longDescription = '' 100 PaddleOCR aims to create multilingual, awesome, leading, and practical OCR 101 tools that help users train better models and apply them into practice. 102 ''; 103 changelog = "https://github.com/PaddlePaddle/PaddleOCR/releases/tag/${src.tag}"; 104 maintainers = with maintainers; [ happysalada ]; 105 platforms = [ 106 "x86_64-linux" 107 "x86_64-darwin" 108 "aarch64-darwin" 109 ]; 110 }; 111}