Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
at r-updates 106 lines 2.3 kB view raw
1{ 2 lib, 3 beautifulsoup4, 4 buildPythonPackage, 5 buildNpmPackage, 6 fetchFromGitHub, 7 html5lib, 8 lxml, 9 nodejs, 10 pytestCheckHook, 11 pythonOlder, 12 regex, 13 setuptools, 14 testers, 15 readabilipy, 16}: 17 18buildPythonPackage rec { 19 pname = "readabilipy"; 20 version = "0.3.0"; 21 pyproject = true; 22 23 disabled = pythonOlder "3.7"; 24 25 src = fetchFromGitHub { 26 owner = "alan-turing-institute"; 27 repo = "ReadabiliPy"; 28 tag = "v${version}"; 29 hash = "sha256-FYdSbq3rm6fBHm5fDRAB0airX9fNcUGs1wHN4i6mnG0="; 30 }; 31 32 javascript = buildNpmPackage { 33 pname = "readabilipy-javascript"; 34 inherit version; 35 36 src = src; 37 sourceRoot = "${src.name}/readabilipy/javascript"; 38 npmDepsHash = "sha256-LiPSCZamkJjivzpawG7H9IEXYjn3uzFeY2vfucyHfUo="; 39 40 postPatch = '' 41 cp ${./package-lock.json} package-lock.json 42 ''; 43 44 dontNpmBuild = true; 45 }; 46 47 build-system = [ setuptools ]; 48 49 dependencies = [ 50 beautifulsoup4 51 html5lib 52 lxml 53 regex 54 ]; 55 56 postPatch = '' 57 ln -s $javascript/lib/node_modules/ReadabiliPy/node_modules readabilipy/javascript/node_modules 58 echo "recursive-include readabilipy/javascript *" >MANIFEST.in 59 ''; 60 61 postInstall = '' 62 wrapProgram $out/bin/readabilipy \ 63 --prefix PATH : ${nodejs}/bin 64 ''; 65 66 nativeCheckInputs = [ 67 pytestCheckHook 68 nodejs 69 ]; 70 71 pythonImportsCheck = [ "readabilipy" ]; 72 73 disabledTestPaths = [ 74 # Exclude benchmarks 75 "tests/test_benchmarking.py" 76 ]; 77 78 disabledTests = [ 79 # IndexError: list index out of range 80 "test_html_blacklist" 81 "test_prune_div_with_one_empty_span" 82 "test_prune_div_with_one_whitespace_paragraph" 83 "test_empty_page" 84 "test_contentless_page" 85 "test_extract_title" 86 "test_iframe_containing_tags" 87 "test_iframe_with_source" 88 ]; 89 90 passthru = { 91 tests.version = testers.testVersion { 92 package = readabilipy; 93 command = "readabilipy --version"; 94 version = "${version} (Readability.js supported: yes)"; 95 }; 96 }; 97 98 meta = with lib; { 99 description = "HTML content extractor"; 100 homepage = "https://github.com/alan-turing-institute/ReadabiliPy"; 101 changelog = "https://github.com/alan-turing-institute/ReadabiliPy/blob/${src.tag}/CHANGELOG.md"; 102 license = licenses.mit; 103 maintainers = with maintainers; [ fab ]; 104 mainProgram = "readabilipy"; 105 }; 106}