1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5
6 # build-system
7 hatchling,
8 poetry-core,
9
10 # dependencies
11 docling,
12 pydantic-settings,
13 typer,
14 boto3,
15 pandas,
16 fastparquet,
17 pyarrow,
18 httpx,
19
20 # optional dependencies
21 tesserocr,
22 rapidocr-onnxruntime,
23 onnxruntime,
24 ray,
25
26 # tests
27 pytestCheckHook,
28 pytest-asyncio,
29 writableTmpDirAsHomeHook,
30
31 # options
32 withTesserocr ? false,
33 withRapidocr ? false,
34 withRay ? false,
35}:
36
37buildPythonPackage rec {
38 pname = "docling-jobkit";
39 version = "1.1.1";
40 pyproject = true;
41
42 src = fetchFromGitHub {
43 owner = "docling-project";
44 repo = "docling-jobkit";
45 tag = "v${version}";
46 hash = "sha256-Q4RCA/gJxyfOfzuRnuCmndVEeV0JUCTU389KSEv7vVk=";
47 };
48
49 build-system = [
50 hatchling
51 poetry-core
52 ];
53
54 dependencies = [
55 docling
56 pydantic-settings
57 typer
58 boto3
59 pandas
60 fastparquet
61 pyarrow
62 httpx
63 ]
64 ++ lib.optionals withTesserocr optional-dependencies.tesserocr
65 ++ lib.optionals withRapidocr optional-dependencies.rapidocr
66 ++ lib.optionals withRay optional-dependencies.ray;
67
68 optional-dependencies = {
69 tesserocr = [ tesserocr ];
70 rapidocr = [
71 rapidocr-onnxruntime
72 onnxruntime
73 ];
74 ray = [ ray ];
75 };
76
77 pythonRelaxDeps = [
78 "boto3"
79 "pyarrow"
80 ];
81
82 pythonImportsCheck = [
83 "docling"
84 "docling_jobkit"
85 ];
86
87 nativeCheckInputs = [
88 pytestCheckHook
89 pytest-asyncio
90 writableTmpDirAsHomeHook
91 ];
92
93 disabledTests = [
94 # requires network access
95 "test_convert_url"
96 "test_convert_file"
97 "test_convert_warmup"
98
99 # Flaky due to comparison with magic object
100 # https://github.com/docling-project/docling-jobkit/issues/45
101 "test_options_validator"
102 ];
103
104 meta = {
105 changelog = "https://github.com/docling-project/docling-jobkit/blob/${src.tag}/CHANGELOG.md";
106 description = "Running a distributed job processing documents with Docling";
107 homepage = "https://github.com/docling-project/docling-jobkit";
108 license = lib.licenses.mit;
109 maintainers = with lib.maintainers; [ codgician ];
110 };
111}