1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5 fetchpatch2,
6 cryptography,
7 charset-normalizer,
8 pythonOlder,
9 pytestCheckHook,
10 setuptools,
11 replaceVars,
12 ocrmypdf,
13}:
14
15buildPythonPackage rec {
16 pname = "pdfminer-six";
17 version = "20240706";
18 pyproject = true;
19
20 disabled = pythonOlder "3.8";
21
22 src = fetchFromGitHub {
23 owner = "pdfminer";
24 repo = "pdfminer.six";
25 tag = version;
26 hash = "sha256-aY7GQADRxeiclr6/G3RRgrPcl8rGiC85JYEIjIa+vG0=";
27 };
28
29 patches = [
30 # https://github.com/pdfminer/pdfminer.six/pull/1027
31 (fetchpatch2 {
32 name = "fix-dereference-MediaBox.patch";
33 url = "https://github.com/pdfminer/pdfminer.six/pull/1027/commits/ad101c152c71431a21bfa5a8dbe33b3ba385ceec.patch?full_index=1";
34 excludes = [ "CHANGELOG.md" ];
35 hash = "sha256-fsSXvN92MVtNFpAst0ctvGrbxVvoe4Nyz4wMZqJ1aw8=";
36 })
37 (replaceVars ./disable-setuptools-git-versioning.patch {
38 inherit version;
39 })
40 ];
41
42 build-system = [ setuptools ];
43
44 dependencies = [
45 charset-normalizer
46 cryptography
47 ];
48
49 postInstall = ''
50 for file in $out/bin/*.py; do
51 ln $file ''${file//.py/}
52 done
53 '';
54
55 pythonImportsCheck = [
56 "pdfminer"
57 "pdfminer.high_level"
58 ];
59
60 nativeCheckInputs = [ pytestCheckHook ];
61
62 disabledTests = [
63 # The binary file samples/contrib/issue-1004-indirect-mediabox.pdf is
64 # stripped from fix-dereference-MediaBox.patch.
65 "test_contrib_issue_1004_mediabox"
66 ];
67
68 passthru = {
69 tests = {
70 inherit ocrmypdf;
71 };
72 };
73
74 meta = with lib; {
75 changelog = "https://github.com/pdfminer/pdfminer.six/blob/${src.rev}/CHANGELOG.md";
76 description = "PDF parser and analyzer";
77 homepage = "https://github.com/pdfminer/pdfminer.six";
78 license = licenses.mit;
79 maintainers = with maintainers; [ psyanticy ];
80 };
81}