1{ 2 lib, 3 buildPythonPackage, 4 fetchFromGitHub, 5 cmake, 6 pkg-config, 7 cxxopts, 8 poetry-core, 9 pybind11, 10 zlib, 11 nlohmann_json, 12 utf8cpp, 13 libjpeg, 14 qpdf, 15 loguru-cpp, 16 # python dependencies 17 tabulate, 18 pillow, 19 pydantic, 20 docling-core, 21 pytestCheckHook, 22}: 23 24buildPythonPackage rec { 25 pname = "docling-parse"; 26 version = "4.0.1"; 27 pyproject = true; 28 29 src = fetchFromGitHub { 30 owner = "docling-project"; 31 repo = "docling-parse"; 32 tag = "v${version}"; 33 hash = "sha256-Po+0IepSTy4ChJVjjTuTZqt8/iLoSJK63bTkH+Xgtl8="; 34 }; 35 36 dontUseCmakeConfigure = true; 37 38 nativeBuildInputs = [ 39 cmake 40 pkg-config 41 ]; 42 43 build-system = [ 44 poetry-core 45 ]; 46 47 env.NIX_CFLAGS_COMPILE = "-I${lib.getDev utf8cpp}/include/utf8cpp"; 48 49 buildInputs = [ 50 pybind11 51 cxxopts 52 libjpeg 53 loguru-cpp 54 nlohmann_json 55 qpdf 56 utf8cpp 57 zlib 58 ]; 59 60 env.USE_SYSTEM_DEPS = true; 61 62 cmakeFlags = [ 63 "-DUSE_SYSTEM_DEPS=True" 64 ]; 65 66 dependencies = [ 67 tabulate 68 pillow 69 pydantic 70 docling-core 71 ]; 72 73 pythonRelaxDeps = [ 74 "pydantic" 75 "pillow" 76 ]; 77 78 pythonImportsCheck = [ 79 "docling_parse" 80 ]; 81 82 nativeCheckInputs = [ 83 pytestCheckHook 84 ]; 85 86 meta = { 87 changelog = "https://github.com/DS4SD/docling-parse/blob/${src.tag}/CHANGELOG.md"; 88 description = "Simple package to extract text with coordinates from programmatic PDFs"; 89 homepage = "https://github.com/DS4SD/docling-parse"; 90 license = lib.licenses.mit; 91 maintainers = with lib.maintainers; [ drupol ]; 92 }; 93}