Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
1{ 2 lib, 3 buildPythonPackage, 4 fetchFromGitHub, 5 cmake, 6 pkg-config, 7 cxxopts, 8 poetry-core, 9 pybind11, 10 zlib, 11 nlohmann_json, 12 utf8cpp, 13 libjpeg, 14 qpdf, 15 loguru-cpp, 16 # python dependencies 17 tabulate, 18 pillow, 19 pydantic, 20 docling-core, 21 pytestCheckHook, 22}: 23 24buildPythonPackage rec { 25 pname = "docling-parse"; 26 version = "4.1.0"; 27 pyproject = true; 28 29 src = fetchFromGitHub { 30 owner = "docling-project"; 31 repo = "docling-parse"; 32 tag = "v${version}"; 33 hash = "sha256-1vl5Ij25NXAwhoXLJ35lcr5r479jrdKd9DxWhYbCApw="; 34 }; 35 36 patches = [ 37 # Fixes test_parse unit tests 38 # export_to_textlines in docling-core >= 2.38.2 includes text direction 39 # by default, which is not included in upstream's groundtruth data. 40 # TODO: remove when docling-core version gets bumped in upstream's uv.lock 41 ./test_parse.patch 42 ]; 43 44 dontUseCmakeConfigure = true; 45 46 nativeBuildInputs = [ 47 cmake 48 pkg-config 49 ]; 50 51 build-system = [ 52 poetry-core 53 ]; 54 55 env.NIX_CFLAGS_COMPILE = "-I${lib.getDev utf8cpp}/include/utf8cpp"; 56 57 buildInputs = [ 58 pybind11 59 cxxopts 60 libjpeg 61 loguru-cpp 62 nlohmann_json 63 qpdf 64 utf8cpp 65 zlib 66 ]; 67 68 env.USE_SYSTEM_DEPS = true; 69 70 cmakeFlags = [ 71 "-DUSE_SYSTEM_DEPS=True" 72 ]; 73 74 dependencies = [ 75 tabulate 76 pillow 77 pydantic 78 docling-core 79 ]; 80 81 pythonRelaxDeps = [ 82 "pydantic" 83 "pillow" 84 ]; 85 86 pythonImportsCheck = [ 87 "docling_parse" 88 ]; 89 90 nativeCheckInputs = [ 91 pytestCheckHook 92 ]; 93 94 meta = { 95 changelog = "https://github.com/DS4SD/docling-parse/blob/${src.tag}/CHANGELOG.md"; 96 description = "Simple package to extract text with coordinates from programmatic PDFs"; 97 homepage = "https://github.com/DS4SD/docling-parse"; 98 license = lib.licenses.mit; 99 maintainers = with lib.maintainers; [ drupol ]; 100 }; 101}