1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5 cryptography,
6 charset-normalizer,
7 pytestCheckHook,
8 setuptools,
9 setuptools-scm,
10 ocrmypdf,
11}:
12
13buildPythonPackage rec {
14 pname = "pdfminer-six";
15 version = "20250506";
16 pyproject = true;
17
18 src = fetchFromGitHub {
19 owner = "pdfminer";
20 repo = "pdfminer.six";
21 tag = version;
22 hash = "sha256-BE/HMl/e1KnkSc2jXU5Du5FMF+rCBv5AJ7a88oFrBgM=";
23 };
24
25 build-system = [
26 setuptools
27 setuptools-scm
28 ];
29
30 dependencies = [
31 charset-normalizer
32 cryptography
33 ];
34
35 postInstall = ''
36 for file in "$out/bin/"*.py; do
37 mv "$file" "''${file%.py}"
38 done
39 '';
40
41 pythonImportsCheck = [
42 "pdfminer"
43 "pdfminer.high_level"
44 ];
45
46 nativeCheckInputs = [ pytestCheckHook ];
47
48 disabledTests = [
49 # The binary file samples/contrib/issue-1004-indirect-mediabox.pdf is
50 # stripped from fix-dereference-MediaBox.patch.
51 "test_contrib_issue_1004_mediabox"
52 ];
53
54 passthru = {
55 tests = {
56 inherit ocrmypdf;
57 };
58 };
59
60 meta = {
61 changelog = "https://github.com/pdfminer/pdfminer.six/blob/${src.rev}/CHANGELOG.md";
62 description = "PDF parser and analyzer";
63 homepage = "https://github.com/pdfminer/pdfminer.six";
64 license = lib.licenses.mit;
65 maintainers = with lib.maintainers; [ psyanticy ];
66 };
67}