1{ lib
2, stdenv
3, botocore
4, buildPythonPackage
5, cryptography
6, cssselect
7, fetchPypi
8, fetchpatch
9, glibcLocales
10, installShellFiles
11, itemadapter
12, itemloaders
13, jmespath
14, lxml
15, packaging
16, parsel
17, protego
18, pydispatcher
19, pyopenssl
20, pytestCheckHook
21, pythonOlder
22, queuelib
23, service-identity
24, sybil
25, testfixtures
26, tldextract
27, twisted
28, w3lib
29, zope_interface
30}:
31
32buildPythonPackage rec {
33 pname = "scrapy";
34 version = "2.11.0";
35 format = "setuptools";
36
37 disabled = pythonOlder "3.8";
38
39 src = fetchPypi {
40 inherit version;
41 pname = "Scrapy";
42 hash = "sha256-PL3tzgw/DgSC1hvi10WGg758188UsO5q37rduA9bNqU=";
43 };
44
45 nativeBuildInputs = [
46 installShellFiles
47 ];
48
49 propagatedBuildInputs = [
50 cryptography
51 cssselect
52 itemadapter
53 itemloaders
54 lxml
55 packaging
56 parsel
57 protego
58 pydispatcher
59 pyopenssl
60 queuelib
61 service-identity
62 tldextract
63 twisted
64 w3lib
65 zope_interface
66 ];
67
68 nativeCheckInputs = [
69 botocore
70 glibcLocales
71 jmespath
72 pytestCheckHook
73 sybil
74 testfixtures
75 ];
76
77 LC_ALL = "en_US.UTF-8";
78
79 disabledTestPaths = [
80 "tests/test_proxy_connect.py"
81 "tests/test_utils_display.py"
82 "tests/test_command_check.py"
83 # Don't test the documentation
84 "docs"
85 ];
86
87 disabledTests = [
88 # It's unclear if the failures are related to libxml2, https://github.com/NixOS/nixpkgs/pull/123890
89 "test_nested_css"
90 "test_nested_xpath"
91 "test_flavor_detection"
92 "test_follow_whitespace"
93 # Requires network access
94 "AnonymousFTPTestCase"
95 "FTPFeedStorageTest"
96 "FeedExportTest"
97 "test_custom_asyncio_loop_enabled_true"
98 "test_custom_loop_asyncio"
99 "test_custom_loop_asyncio_deferred_signal"
100 "FileFeedStoragePreFeedOptionsTest" # https://github.com/scrapy/scrapy/issues/5157
101 "test_persist"
102 "test_timeout_download_from_spider_nodata_rcvd"
103 "test_timeout_download_from_spider_server_hangs"
104 "test_unbounded_response"
105 "CookiesMiddlewareTest"
106 # Depends on uvloop
107 "test_asyncio_enabled_reactor_different_loop"
108 "test_asyncio_enabled_reactor_same_loop"
109 # Fails with AssertionError
110 "test_peek_fifo"
111 "test_peek_one_element"
112 "test_peek_lifo"
113 "test_callback_kwargs"
114 # Test fails on Hydra
115 "test_start_requests_laziness"
116 ] ++ lib.optionals stdenv.isDarwin [
117 "test_xmliter_encoding"
118 "test_download"
119 "test_reactor_default_twisted_reactor_select"
120 "URIParamsSettingTest"
121 "URIParamsFeedOptionTest"
122 # flaky on darwin-aarch64
123 "test_fixed_delay"
124 "test_start_requests_laziness"
125 ];
126
127 postInstall = ''
128 installManPage extras/scrapy.1
129 installShellCompletion --cmd scrapy \
130 --zsh extras/scrapy_zsh_completion \
131 --bash extras/scrapy_bash_completion
132 '';
133
134 pythonImportsCheck = [
135 "scrapy"
136 ];
137
138 __darwinAllowLocalNetworking = true;
139
140 meta = with lib; {
141 description = "High-level web crawling and web scraping framework";
142 longDescription = ''
143 Scrapy is a fast high-level web crawling and web scraping framework, used to crawl
144 websites and extract structured data from their pages. It can be used for a wide
145 range of purposes, from data mining to monitoring and automated testing.
146 '';
147 homepage = "https://scrapy.org/";
148 changelog = "https://github.com/scrapy/scrapy/raw/${version}/docs/news.rst";
149 license = licenses.bsd3;
150 maintainers = with maintainers; [ marsam ];
151 };
152}