1{ lib
2, buildPythonPackage
3, distro
4, fetchFromGitHub
5, jre
6, numpy
7, pandas
8, pytestCheckHook
9, pythonOlder
10, setuptools
11, setuptools-scm
12, jpype1
13}:
14
15buildPythonPackage rec {
16 pname = "tabula-py";
17 version = "2.8.2";
18 format = "pyproject";
19
20 disabled = pythonOlder "3.8";
21
22 src = fetchFromGitHub {
23 owner = "chezou";
24 repo = pname;
25 rev = "refs/tags/v${version}";
26 hash = "sha256-Zrq1i+HYXXNulyZ/fv00AgVd7ODj3rP9orLq5rT3ERU=";
27 };
28
29 postPatch = ''
30 substituteInPlace tabula/backend.py \
31 --replace '"java"' '"${lib.getExe jre}"'
32 '';
33
34 SETUPTOOLS_SCM_PRETEND_VERSION = version;
35
36 nativeBuildInputs = [
37 setuptools
38 setuptools-scm
39 ];
40
41 buildInputs = [
42 jre
43 ];
44
45 propagatedBuildInputs = [
46 distro
47 numpy
48 pandas
49 jpype1
50 ];
51
52 nativeCheckInputs = [
53 pytestCheckHook
54 ];
55
56 pythonImportsCheck = [
57 "tabula"
58 ];
59
60 disabledTests = [
61 # Tests require network access
62 "test_convert_remote_file"
63 "test_read_pdf_with_remote_template"
64 "test_read_remote_pdf"
65 "test_read_remote_pdf_with_custom_user_agent"
66 # not sure what it checks
67 # probably related to jpype, but we use subprocess instead
68 # https://github.com/chezou/tabula-py/issues/352#issuecomment-1730791540
69 # Failed: DID NOT RAISE <class 'RuntimeError'>
70 "test_read_pdf_with_silent_true"
71 ];
72
73 meta = with lib; {
74 description = "Module to extract table from PDF into pandas DataFrame";
75 homepage = "https://github.com/chezou/tabula-py";
76 changelog = "https://github.com/chezou/tabula-py/releases/tag/v${version}";
77 license = licenses.mit;
78 maintainers = with maintainers; [ fab ];
79 };
80}