at 23.11-beta 3.9 kB view raw
1{ lib 2, stdenv 3, botocore 4, buildPythonPackage 5, cryptography 6, cssselect 7, fetchPypi 8, fetchpatch 9, glibcLocales 10, installShellFiles 11, itemadapter 12, itemloaders 13, jmespath 14, lxml 15, packaging 16, parsel 17, pexpect 18, protego 19, pydispatcher 20, pyopenssl 21, pytestCheckHook 22, pythonOlder 23, queuelib 24, service-identity 25, sybil 26, testfixtures 27, tldextract 28, twisted 29, w3lib 30, zope_interface 31}: 32 33buildPythonPackage rec { 34 pname = "scrapy"; 35 version = "2.11.0"; 36 format = "setuptools"; 37 38 disabled = pythonOlder "3.8"; 39 40 src = fetchPypi { 41 inherit version; 42 pname = "Scrapy"; 43 hash = "sha256-PL3tzgw/DgSC1hvi10WGg758188UsO5q37rduA9bNqU="; 44 }; 45 46 patches = [ 47 # Fix compatiblity with Twisted>=23.8. Remove with the next release. 48 (fetchpatch { 49 url = "https://github.com/scrapy/scrapy/commit/aa95ada42cdf570f840f55c463375f8a81b303f8.patch"; 50 hash = "sha256-LuhA5BqtjSUgkotplvUCtvGNYOTrl0MJRCXiSBMDFzY="; 51 excludes = [ 52 "tests/CrawlerProcess/sleeping.py" 53 "tests/test_crawler.py" 54 ]; 55 }) 56 ]; 57 58 nativeBuildInputs = [ 59 installShellFiles 60 ]; 61 62 propagatedBuildInputs = [ 63 cryptography 64 cssselect 65 itemadapter 66 itemloaders 67 lxml 68 packaging 69 parsel 70 protego 71 pydispatcher 72 pyopenssl 73 queuelib 74 service-identity 75 tldextract 76 twisted 77 w3lib 78 zope_interface 79 ]; 80 81 nativeCheckInputs = [ 82 botocore 83 glibcLocales 84 jmespath 85 pexpect 86 pytestCheckHook 87 sybil 88 testfixtures 89 ]; 90 91 LC_ALL = "en_US.UTF-8"; 92 93 disabledTestPaths = [ 94 "tests/test_proxy_connect.py" 95 "tests/test_utils_display.py" 96 "tests/test_command_check.py" 97 # Don't test the documentation 98 "docs" 99 ]; 100 101 disabledTests = [ 102 # It's unclear if the failures are related to libxml2, https://github.com/NixOS/nixpkgs/pull/123890 103 "test_nested_css" 104 "test_nested_xpath" 105 "test_flavor_detection" 106 "test_follow_whitespace" 107 # Requires network access 108 "AnonymousFTPTestCase" 109 "FTPFeedStorageTest" 110 "FeedExportTest" 111 "test_custom_asyncio_loop_enabled_true" 112 "test_custom_loop_asyncio" 113 "test_custom_loop_asyncio_deferred_signal" 114 "FileFeedStoragePreFeedOptionsTest" # https://github.com/scrapy/scrapy/issues/5157 115 "test_persist" 116 "test_timeout_download_from_spider_nodata_rcvd" 117 "test_timeout_download_from_spider_server_hangs" 118 "test_unbounded_response" 119 "CookiesMiddlewareTest" 120 # Depends on uvloop 121 "test_asyncio_enabled_reactor_different_loop" 122 "test_asyncio_enabled_reactor_same_loop" 123 # Fails with AssertionError 124 "test_peek_fifo" 125 "test_peek_one_element" 126 "test_peek_lifo" 127 "test_callback_kwargs" 128 # Test fails on Hydra 129 "test_start_requests_laziness" 130 ] ++ lib.optionals stdenv.isDarwin [ 131 "test_xmliter_encoding" 132 "test_download" 133 "test_reactor_default_twisted_reactor_select" 134 "URIParamsSettingTest" 135 "URIParamsFeedOptionTest" 136 # flaky on darwin-aarch64 137 "test_fixed_delay" 138 "test_start_requests_laziness" 139 ]; 140 141 postInstall = '' 142 installManPage extras/scrapy.1 143 installShellCompletion --cmd scrapy \ 144 --zsh extras/scrapy_zsh_completion \ 145 --bash extras/scrapy_bash_completion 146 ''; 147 148 pythonImportsCheck = [ 149 "scrapy" 150 ]; 151 152 __darwinAllowLocalNetworking = true; 153 154 meta = with lib; { 155 description = "High-level web crawling and web scraping framework"; 156 longDescription = '' 157 Scrapy is a fast high-level web crawling and web scraping framework, used to crawl 158 websites and extract structured data from their pages. It can be used for a wide 159 range of purposes, from data mining to monitoring and automated testing. 160 ''; 161 homepage = "https://scrapy.org/"; 162 changelog = "https://github.com/scrapy/scrapy/raw/${version}/docs/news.rst"; 163 license = licenses.bsd3; 164 maintainers = with maintainers; [ marsam ]; 165 }; 166}