at 24.11-pre 4.0 kB view raw
1{ 2 lib, 3 stdenv, 4 botocore, 5 buildPythonPackage, 6 cryptography, 7 cssselect, 8 fetchPypi, 9 fetchpatch, 10 glibcLocales, 11 installShellFiles, 12 itemadapter, 13 itemloaders, 14 jmespath, 15 lxml, 16 packaging, 17 parsel, 18 pexpect, 19 protego, 20 pydispatcher, 21 pyopenssl, 22 pytestCheckHook, 23 pythonOlder, 24 queuelib, 25 service-identity, 26 setuptools, 27 sybil, 28 testfixtures, 29 tldextract, 30 twisted, 31 w3lib, 32 zope-interface, 33}: 34 35buildPythonPackage rec { 36 pname = "scrapy"; 37 version = "2.11.1"; 38 pyproject = true; 39 40 disabled = pythonOlder "3.8"; 41 42 src = fetchPypi { 43 inherit version; 44 pname = "Scrapy"; 45 hash = "sha256-czoDnHQj5StpvygQtTMgk9TkKoSEYDWcB7Auz/j3Pr4="; 46 }; 47 48 patches = [ 49 # https://github.com/scrapy/scrapy/pull/6316 50 # fix test_get_func_args. remove on next update 51 (fetchpatch { 52 name = "test_get_func_args.patch"; 53 url = "https://github.com/scrapy/scrapy/commit/b1fe97dc6c8509d58b29c61cf7801eeee1b409a9.patch"; 54 hash = "sha256-POlmsuW4SD9baKwZieKfmlp2vtdlb7aKQ62VOmNXsr0="; 55 }) 56 ]; 57 58 nativeBuildInputs = [ 59 installShellFiles 60 setuptools 61 ]; 62 63 propagatedBuildInputs = [ 64 cryptography 65 cssselect 66 itemadapter 67 itemloaders 68 lxml 69 packaging 70 parsel 71 protego 72 pydispatcher 73 pyopenssl 74 queuelib 75 service-identity 76 tldextract 77 twisted 78 w3lib 79 zope-interface 80 ]; 81 82 nativeCheckInputs = [ 83 botocore 84 glibcLocales 85 jmespath 86 pexpect 87 pytestCheckHook 88 sybil 89 testfixtures 90 ]; 91 92 LC_ALL = "en_US.UTF-8"; 93 94 disabledTestPaths = [ 95 "tests/test_proxy_connect.py" 96 "tests/test_utils_display.py" 97 "tests/test_command_check.py" 98 # Don't test the documentation 99 "docs" 100 ]; 101 102 disabledTests = 103 [ 104 # It's unclear if the failures are related to libxml2, https://github.com/NixOS/nixpkgs/pull/123890 105 "test_nested_css" 106 "test_nested_xpath" 107 "test_flavor_detection" 108 "test_follow_whitespace" 109 # Requires network access 110 "AnonymousFTPTestCase" 111 "FTPFeedStorageTest" 112 "FeedExportTest" 113 "test_custom_asyncio_loop_enabled_true" 114 "test_custom_loop_asyncio" 115 "test_custom_loop_asyncio_deferred_signal" 116 "FileFeedStoragePreFeedOptionsTest" # https://github.com/scrapy/scrapy/issues/5157 117 "test_persist" 118 "test_timeout_download_from_spider_nodata_rcvd" 119 "test_timeout_download_from_spider_server_hangs" 120 "test_unbounded_response" 121 "CookiesMiddlewareTest" 122 # Depends on uvloop 123 "test_asyncio_enabled_reactor_different_loop" 124 "test_asyncio_enabled_reactor_same_loop" 125 # Fails with AssertionError 126 "test_peek_fifo" 127 "test_peek_one_element" 128 "test_peek_lifo" 129 "test_callback_kwargs" 130 # Test fails on Hydra 131 "test_start_requests_laziness" 132 ] 133 ++ lib.optionals stdenv.isDarwin [ 134 "test_xmliter_encoding" 135 "test_download" 136 "test_reactor_default_twisted_reactor_select" 137 "URIParamsSettingTest" 138 "URIParamsFeedOptionTest" 139 # flaky on darwin-aarch64 140 "test_fixed_delay" 141 "test_start_requests_laziness" 142 ]; 143 144 postInstall = '' 145 installManPage extras/scrapy.1 146 installShellCompletion --cmd scrapy \ 147 --zsh extras/scrapy_zsh_completion \ 148 --bash extras/scrapy_bash_completion 149 ''; 150 151 pythonImportsCheck = [ "scrapy" ]; 152 153 __darwinAllowLocalNetworking = true; 154 155 meta = with lib; { 156 description = "High-level web crawling and web scraping framework"; 157 mainProgram = "scrapy"; 158 longDescription = '' 159 Scrapy is a fast high-level web crawling and web scraping framework, used to crawl 160 websites and extract structured data from their pages. It can be used for a wide 161 range of purposes, from data mining to monitoring and automated testing. 162 ''; 163 homepage = "https://scrapy.org/"; 164 changelog = "https://github.com/scrapy/scrapy/raw/${version}/docs/news.rst"; 165 license = licenses.bsd3; 166 maintainers = with maintainers; [ vinnymeller ]; 167 }; 168}