1{
2 lib,
3 beautifulsoup4,
4 buildPythonPackage,
5 fetchFromGitHub,
6 html5lib,
7 lxml,
8 pytestCheckHook,
9 pythonOlder,
10 regex,
11 setuptools,
12}:
13
14buildPythonPackage rec {
15 pname = "readabilipy";
16 version = "0.2.0";
17 pyproject = true;
18
19 disabled = pythonOlder "3.7";
20
21 src = fetchFromGitHub {
22 owner = "alan-turing-institute";
23 repo = "ReadabiliPy";
24 rev = "refs/tags/v${version}";
25 hash = "sha256-XrmdQjLFYdadWeO5DoKAQeEdta+6T6BqfvGlDkzLMyM=";
26 };
27
28 nativeBuildInputs = [ setuptools ];
29
30 propagatedBuildInputs = [
31 beautifulsoup4
32 html5lib
33 lxml
34 regex
35 ];
36
37 nativeCheckInputs = [ pytestCheckHook ];
38
39 pythonImportsCheck = [ "readabilipy" ];
40
41 disabledTests = [
42 # AssertionError
43 "test_extract_simple_article_with_readability_js"
44 "test_extract_article_from_page_with_readability_js"
45 "test_plain_element_with_comments"
46 "test_content_digest_on_filled_and_empty_elements"
47 ];
48
49 disabledTestPaths = [
50 # Exclude benchmarks
51 "tests/test_benchmarking.py"
52 ];
53
54 meta = with lib; {
55 description = "HTML content extractor";
56 mainProgram = "readabilipy";
57 homepage = "https://github.com/alan-turing-institute/ReadabiliPy";
58 changelog = "https://github.com/alan-turing-institute/ReadabiliPy/blob/${version}/CHANGELOG.md";
59 license = licenses.mit;
60 maintainers = with maintainers; [ fab ];
61 };
62}