Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
at gcc-offload 78 lines 1.8 kB view raw
1{ 2 lib, 3 buildPythonPackage, 4 certifi, 5 charset-normalizer, 6 courlan, 7 fetchPypi, 8 htmldate, 9 justext, 10 lxml, 11 pytestCheckHook, 12 pythonOlder, 13 setuptools, 14 urllib3, 15}: 16 17buildPythonPackage rec { 18 pname = "trafilatura"; 19 version = "1.12.2"; 20 pyproject = true; 21 22 disabled = pythonOlder "3.9"; 23 24 src = fetchPypi { 25 inherit pname version; 26 hash = "sha256-TJyxQ09+E+8LFstE7h1E6EUj7HJolAuVWcN05+/8mpY="; 27 }; 28 29 # Patch out gui cli because it is not supported in this packaging and 30 # nixify path to the trafilatura binary in the test suite 31 postPatch = '' 32 substituteInPlace setup.py \ 33 --replace-fail '"trafilatura_gui=trafilatura.gui:main",' "" 34 substituteInPlace tests/cli_tests.py \ 35 --replace-fail 'trafilatura_bin = "trafilatura"' \ 36 'trafilatura_bin = "${placeholder "out"}/bin/trafilatura"' 37 ''; 38 39 build-system = [ setuptools ]; 40 41 dependencies = [ 42 certifi 43 charset-normalizer 44 courlan 45 htmldate 46 justext 47 lxml 48 urllib3 49 ]; 50 51 nativeCheckInputs = [ pytestCheckHook ]; 52 53 disabledTests = [ 54 # Disable tests that require an internet connection 55 "test_cli_pipeline" 56 "test_crawl_page" 57 "test_download" 58 "test_feeds_helpers" 59 "test_fetch" 60 "test_is_live_page" 61 "test_meta_redirections" 62 "test_probing" 63 "test_queue" 64 "test_redirection" 65 "test_whole" 66 ]; 67 68 pythonImportsCheck = [ "trafilatura" ]; 69 70 meta = { 71 description = "Python package and command-line tool designed to gather text on the Web"; 72 homepage = "https://trafilatura.readthedocs.io"; 73 changelog = "https://github.com/adbar/trafilatura/blob/v${version}/HISTORY.md"; 74 license = lib.licenses.asl20; 75 maintainers = with lib.maintainers; [ jokatzke ]; 76 mainProgram = "trafilatura"; 77 }; 78}