nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
1{
2 lib,
3 buildPythonPackage,
4 distro,
5 fetchFromGitHub,
6 jre,
7 numpy,
8 pandas,
9 pytestCheckHook,
10 setuptools,
11 setuptools-scm,
12 jpype1,
13}:
14
15buildPythonPackage rec {
16 pname = "tabula-py";
17 version = "2.10.0";
18 pyproject = true;
19
20 src = fetchFromGitHub {
21 owner = "chezou";
22 repo = "tabula-py";
23 tag = "v${version}";
24 hash = "sha256-PQbwm9ho3XtpmZ7N7ASkrV8gk9Jom+yQKlt2fUa948s=";
25 };
26
27 postPatch = ''
28 substituteInPlace tabula/backend.py \
29 --replace-fail '"java"' '"${lib.getExe jre}"'
30 '';
31
32 build-system = [
33 setuptools
34 setuptools-scm
35 ];
36
37 buildInputs = [ jre ];
38
39 dependencies = [
40 distro
41 numpy
42 pandas
43 jpype1
44 ];
45
46 nativeCheckInputs = [ pytestCheckHook ];
47
48 pythonImportsCheck = [ "tabula" ];
49
50 disabledTests = [
51 # Tests require network access
52 "test_convert_remote_file"
53 "test_read_pdf_with_remote_template"
54 "test_read_remote_pdf"
55 "test_read_remote_pdf_with_custom_user_agent"
56 # not sure what it checks
57 # probably related to jpype, but we use subprocess instead
58 # https://github.com/chezou/tabula-py/issues/352#issuecomment-1730791540
59 # Failed: DID NOT RAISE <class 'RuntimeError'>
60 "test_read_pdf_with_silent_true"
61 ];
62
63 meta = {
64 description = "Module to extract table from PDF into pandas DataFrame";
65 homepage = "https://github.com/chezou/tabula-py";
66 changelog = "https://github.com/chezou/tabula-py/releases/tag/v${version}";
67 license = lib.licenses.mit;
68 maintainers = with lib.maintainers; [ fab ];
69 };
70}