1{ 2 lib, 3 buildPythonPackage, 4 pythonRelaxDepsHook, 5 fetchFromGitHub, 6 attrdict, 7 beautifulsoup4, 8 cython, 9 fire, 10 fonttools, 11 lmdb, 12 lxml, 13 numpy, 14 opencv4, 15 openpyxl, 16 pdf2docx, 17 pillow, 18 premailer, 19 pyclipper, 20 pymupdf, 21 python-docx, 22 rapidfuzz, 23 scikit-image, 24 shapely, 25 tqdm, 26 paddlepaddle, 27 lanms-neo, 28 polygon3, 29}: 30 31let 32 version = "2.7.1"; 33in 34buildPythonPackage { 35 pname = "paddleocr"; 36 inherit version; 37 format = "setuptools"; 38 39 src = fetchFromGitHub { 40 owner = "PaddlePaddle"; 41 repo = "PaddleOCR"; 42 rev = "v${version}"; 43 hash = "sha256-5Dt4UL+7dwJNjcNnCVi3o8bLCt7/m/M6oh1vPu9rza8="; 44 }; 45 46 patches = [ 47 # The `ppocr.data.imaug` re-exports the `IaaAugment` and `CopyPaste` 48 # classes. These classes depend on the `imgaug` package which is 49 # unmaintained and has been removed from nixpkgs. 50 # 51 # The image OCR feature of PaddleOCR doesn't use these classes though, so 52 # they work even after stripping the the `IaaAugment` and `CopyPaste` 53 # exports. It probably breaks some of the OCR model creation tooling that 54 # PaddleOCR provides, however. 55 ./remove-import-imaug.patch 56 ]; 57 58 nativeBuildInputs = [ pythonRelaxDepsHook ]; 59 # trying to relax only pymupdf makes the whole build fail 60 pythonRelaxDeps = true; 61 pythonRemoveDeps = [ 62 "imgaug" 63 "visualdl" 64 "opencv-python" 65 "opencv-contrib-python" 66 ]; 67 68 propagatedBuildInputs = [ 69 attrdict 70 beautifulsoup4 71 cython 72 fire 73 fonttools 74 lmdb 75 lxml 76 numpy 77 opencv4 78 openpyxl 79 pdf2docx 80 pillow 81 premailer 82 pyclipper 83 pymupdf 84 python-docx 85 rapidfuzz 86 scikit-image 87 shapely 88 tqdm 89 paddlepaddle 90 lanms-neo 91 polygon3 92 ]; 93 94 # TODO: The tests depend, among possibly other things, on `cudatoolkit`. 95 # But Cudatoolkit fails to install. 96 # preCheck = "export HOME=$TMPDIR"; 97 # nativeCheckInputs = with pkgs; [ which cudatoolkit ]; 98 doCheck = false; 99 100 meta = with lib; { 101 homepage = "https://github.com/PaddlePaddle/PaddleOCR"; 102 license = licenses.asl20; 103 description = "Multilingual OCR toolkits based on PaddlePaddle"; 104 longDescription = '' 105 PaddleOCR aims to create multilingual, awesome, leading, and practical OCR 106 tools that help users train better models and apply them into practice. 107 ''; 108 changelog = "https://github.com/PaddlePaddle/PaddleOCR/releases/tag/v${version}"; 109 maintainers = with maintainers; [ happysalada ]; 110 platforms = [ 111 "x86_64-linux" 112 "x86_64-darwin" 113 "aarch64-darwin" 114 ]; 115 }; 116}