1{ 2 lib, 3 beautifulsoup4, 4 buildPythonPackage, 5 buildNpmPackage, 6 fetchFromGitHub, 7 html5lib, 8 lxml, 9 nodejs, 10 pytestCheckHook, 11 pythonOlder, 12 regex, 13 setuptools, 14 testers, 15 readabilipy, 16}: 17 18buildPythonPackage rec { 19 pname = "readabilipy"; 20 version = "0.3.0"; 21 pyproject = true; 22 23 disabled = pythonOlder "3.7"; 24 25 src = fetchFromGitHub { 26 owner = "alan-turing-institute"; 27 repo = "ReadabiliPy"; 28 tag = "v${version}"; 29 hash = "sha256-FYdSbq3rm6fBHm5fDRAB0airX9fNcUGs1wHN4i6mnG0="; 30 }; 31 32 javascript = buildNpmPackage { 33 pname = "readabilipy-javascript"; 34 inherit version; 35 36 src = src; 37 sourceRoot = "${src.name}/readabilipy/javascript"; 38 npmDepsHash = "sha256-LiPSCZamkJjivzpawG7H9IEXYjn3uzFeY2vfucyHfUo="; 39 40 postPatch = '' 41 cp ${./package-lock.json} package-lock.json 42 ''; 43 44 dontNpmBuild = true; 45 }; 46 47 nativeBuildInputs = [ setuptools ]; 48 49 propagatedBuildInputs = [ 50 beautifulsoup4 51 html5lib 52 lxml 53 regex 54 ]; 55 56 postPatch = '' 57 ln -s $javascript/lib/node_modules/ReadabiliPy/node_modules readabilipy/javascript/node_modules 58 echo "recursive-include readabilipy/javascript *" >MANIFEST.in 59 ''; 60 61 postInstall = '' 62 wrapProgram $out/bin/readabilipy \ 63 --prefix PATH : ${nodejs}/bin 64 ''; 65 66 nativeCheckInputs = [ 67 pytestCheckHook 68 nodejs 69 ]; 70 71 pythonImportsCheck = [ "readabilipy" ]; 72 73 disabledTestPaths = [ 74 # Exclude benchmarks 75 "tests/test_benchmarking.py" 76 ]; 77 78 passthru = { 79 tests.version = testers.testVersion { 80 package = readabilipy; 81 command = "readabilipy --version"; 82 version = "${version} (Readability.js supported: yes)"; 83 }; 84 }; 85 86 meta = with lib; { 87 description = "HTML content extractor"; 88 mainProgram = "readabilipy"; 89 homepage = "https://github.com/alan-turing-institute/ReadabiliPy"; 90 changelog = "https://github.com/alan-turing-institute/ReadabiliPy/blob/${src.tag}/CHANGELOG.md"; 91 license = licenses.mit; 92 maintainers = with maintainers; [ fab ]; 93 }; 94}