at 25.11-pre 3.3 kB view raw
1{ 2 lib, 3 stdenv, 4 botocore, 5 buildPythonPackage, 6 cryptography, 7 cssselect, 8 defusedxml, 9 fetchFromGitHub, 10 glibcLocales, 11 installShellFiles, 12 itemadapter, 13 itemloaders, 14 jmespath, 15 lxml, 16 packaging, 17 parsel, 18 pexpect, 19 protego, 20 pydispatcher, 21 pyopenssl, 22 pytest-xdist, 23 pytestCheckHook, 24 pythonOlder, 25 queuelib, 26 service-identity, 27 setuptools, 28 sybil, 29 testfixtures, 30 tldextract, 31 twisted, 32 uvloop, 33 w3lib, 34 zope-interface, 35}: 36 37buildPythonPackage rec { 38 pname = "scrapy"; 39 version = "2.12.0"; 40 pyproject = true; 41 42 disabled = pythonOlder "3.8"; 43 44 src = fetchFromGitHub { 45 owner = "scrapy"; 46 repo = "scrapy"; 47 tag = version; 48 hash = "sha256-o3+57+bZRohgrld2EuoQDU2LioJu0jmaC/RPREvI1t8="; 49 }; 50 51 pythonRelaxDeps = [ 52 "defusedxml" 53 ]; 54 55 nativeBuildInputs = [ 56 installShellFiles 57 setuptools 58 ]; 59 60 propagatedBuildInputs = [ 61 cryptography 62 cssselect 63 defusedxml 64 itemadapter 65 itemloaders 66 lxml 67 packaging 68 parsel 69 protego 70 pydispatcher 71 pyopenssl 72 queuelib 73 service-identity 74 tldextract 75 twisted 76 w3lib 77 zope-interface 78 ]; 79 80 nativeCheckInputs = [ 81 botocore 82 glibcLocales 83 jmespath 84 pexpect 85 pytest-xdist 86 pytestCheckHook 87 sybil 88 testfixtures 89 uvloop 90 ]; 91 92 LC_ALL = "en_US.UTF-8"; 93 94 disabledTestPaths = [ 95 "tests/test_proxy_connect.py" 96 "tests/test_utils_display.py" 97 "tests/test_command_check.py" 98 # Don't test the documentation 99 "docs" 100 ]; 101 102 disabledTests = 103 [ 104 # Requires network access 105 "AnonymousFTPTestCase" 106 "FTPFeedStorageTest" 107 "FeedExportTest" 108 "test_custom_asyncio_loop_enabled_true" 109 "test_custom_loop_asyncio" 110 "test_custom_loop_asyncio_deferred_signal" 111 "FileFeedStoragePreFeedOptionsTest" # https://github.com/scrapy/scrapy/issues/5157 112 "test_persist" 113 "test_timeout_download_from_spider_nodata_rcvd" 114 "test_timeout_download_from_spider_server_hangs" 115 "test_unbounded_response" 116 "CookiesMiddlewareTest" 117 # Test fails on Hydra 118 "test_start_requests_laziness" 119 ] 120 ++ lib.optionals stdenv.hostPlatform.isDarwin [ 121 "test_xmliter_encoding" 122 "test_download" 123 "test_reactor_default_twisted_reactor_select" 124 "URIParamsSettingTest" 125 "URIParamsFeedOptionTest" 126 # flaky on darwin-aarch64 127 "test_fixed_delay" 128 "test_start_requests_laziness" 129 ]; 130 131 postInstall = '' 132 installManPage extras/scrapy.1 133 installShellCompletion --cmd scrapy \ 134 --zsh extras/scrapy_zsh_completion \ 135 --bash extras/scrapy_bash_completion 136 ''; 137 138 pythonImportsCheck = [ "scrapy" ]; 139 140 __darwinAllowLocalNetworking = true; 141 142 meta = with lib; { 143 description = "High-level web crawling and web scraping framework"; 144 mainProgram = "scrapy"; 145 longDescription = '' 146 Scrapy is a fast high-level web crawling and web scraping framework, used to crawl 147 websites and extract structured data from their pages. It can be used for a wide 148 range of purposes, from data mining to monitoring and automated testing. 149 ''; 150 homepage = "https://scrapy.org/"; 151 changelog = "https://github.com/scrapy/scrapy/raw/${src.tag}/docs/news.rst"; 152 license = licenses.bsd3; 153 maintainers = with maintainers; [ vinnymeller ]; 154 }; 155}