1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5 cmake,
6 pkg-config,
7 cxxopts,
8 setuptools,
9 pybind11,
10 zlib,
11 nlohmann_json,
12 utf8cpp,
13 libjpeg,
14 qpdf,
15 loguru-cpp,
16 # python dependencies
17 tabulate,
18 pillow,
19 pydantic,
20 docling-core,
21 pytestCheckHook,
22}:
23
24buildPythonPackage rec {
25 pname = "docling-parse";
26 version = "4.5.0";
27 pyproject = true;
28
29 src = fetchFromGitHub {
30 owner = "docling-project";
31 repo = "docling-parse";
32 tag = "v${version}";
33 hash = "sha256-8eHYMvfjPuGgrgrlqEh061ug+yer+1nQLbeDR1dQu68=";
34 };
35
36 postPatch = ''
37 substituteInPlace pyproject.toml \
38 --replace-fail \
39 '"cmake>=3.27.0,<4.0.0"' \
40 '"cmake>=3.27.0"'
41 '';
42
43 dontUseCmakeConfigure = true;
44
45 nativeBuildInputs = [
46 cmake
47 pkg-config
48 ];
49
50 build-system = [
51 setuptools
52 ];
53
54 env.NIX_CFLAGS_COMPILE = "-I${lib.getDev utf8cpp}/include/utf8cpp";
55
56 buildInputs = [
57 pybind11
58 cxxopts
59 libjpeg
60 loguru-cpp
61 nlohmann_json
62 qpdf
63 utf8cpp
64 zlib
65 ];
66
67 env.USE_SYSTEM_DEPS = true;
68
69 cmakeFlags = [
70 "-DUSE_SYSTEM_DEPS=True"
71 ];
72
73 dependencies = [
74 tabulate
75 pillow
76 pydantic
77 docling-core
78 ];
79
80 pythonRelaxDeps = [
81 "pydantic"
82 "pillow"
83 ];
84
85 # Listed as runtime dependencies but only used in CI to build wheels
86 preBuild = ''
87 sed -i '/cibuildwheel/d' pyproject.toml
88 sed -i '/delocate/d' pyproject.toml
89 '';
90
91 pythonImportsCheck = [
92 "docling_parse"
93 ];
94
95 nativeCheckInputs = [
96 pytestCheckHook
97 ];
98
99 meta = {
100 changelog = "https://github.com/DS4SD/docling-parse/blob/${src.tag}/CHANGELOG.md";
101 description = "Simple package to extract text with coordinates from programmatic PDFs";
102 homepage = "https://github.com/DS4SD/docling-parse";
103 license = lib.licenses.mit;
104 maintainers = [ ];
105 };
106}