1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5 cmake,
6 pkg-config,
7 cxxopts,
8 poetry-core,
9 pybind11,
10 tabulate,
11 zlib,
12 nlohmann_json,
13 utf8cpp,
14 libjpeg,
15 qpdf,
16 loguru-cpp,
17 pytestCheckHook,
18}:
19
20buildPythonPackage rec {
21 pname = "docling-parse";
22 version = "2.0.3";
23 pyproject = true;
24
25 src = fetchFromGitHub {
26 owner = "DS4SD";
27 repo = "docling-parse";
28 rev = "refs/tags/v${version}";
29 hash = "sha256-pZJ7lneg4ftAoWS5AOflkkKCwZGF4TJIuqDjq4W4VBw=";
30 };
31
32 dontUseCmakeConfigure = true;
33
34 nativeBuildInputs = [
35 cmake
36 pkg-config
37 ];
38
39 build-system = [
40 poetry-core
41 ];
42
43 env.NIX_CFLAGS_COMPILE = "-I${lib.getDev utf8cpp}/include/utf8cpp";
44
45 buildInputs = [
46 pybind11
47 cxxopts
48 libjpeg
49 loguru-cpp
50 nlohmann_json
51 qpdf
52 utf8cpp
53 zlib
54 ];
55
56 env.USE_SYSTEM_DEPS = true;
57
58 cmakeFlags = [
59 "-DUSE_SYSTEM_DEPS=True"
60 ];
61
62 dependencies = [
63 tabulate
64 ];
65
66 pythonImportsCheck = [
67 "docling_parse"
68 ];
69
70 nativeCheckInputs = [
71 pytestCheckHook
72 ];
73
74 meta = {
75 changelog = "https://github.com/DS4SD/docling-parse/blob/${src.rev}/CHANGELOG.md";
76 description = "Simple package to extract text with coordinates from programmatic PDFs";
77 homepage = "https://github.com/DS4SD/docling-parse";
78 license = lib.licenses.mit;
79 maintainers = with lib.maintainers; [ drupol ];
80 };
81}