1{
2 lib,
3 stdenv,
4 botocore,
5 buildPythonPackage,
6 cryptography,
7 cssselect,
8 defusedxml,
9 fetchFromGitHub,
10 glibcLocales,
11 installShellFiles,
12 itemadapter,
13 itemloaders,
14 jmespath,
15 lxml,
16 packaging,
17 parsel,
18 pexpect,
19 protego,
20 pydispatcher,
21 pyopenssl,
22 pytest-xdist,
23 pytestCheckHook,
24 pythonOlder,
25 queuelib,
26 service-identity,
27 setuptools,
28 sybil,
29 testfixtures,
30 tldextract,
31 twisted,
32 uvloop,
33 w3lib,
34 zope-interface,
35}:
36
37buildPythonPackage rec {
38 pname = "scrapy";
39 version = "2.11.2";
40 pyproject = true;
41
42 disabled = pythonOlder "3.8";
43
44 src = fetchFromGitHub {
45 owner = "scrapy";
46 repo = "scrapy";
47 rev = "refs/tags/${version}";
48 hash = "sha256-EaO1kQ3VSTwEW+r0kSKycOxHNTPwwCVjch1ZBrTU0qQ=";
49 };
50
51 pythonRelaxDeps = [
52 "defusedxml"
53 ];
54
55 nativeBuildInputs = [
56 installShellFiles
57 setuptools
58 ];
59
60 propagatedBuildInputs = [
61 cryptography
62 cssselect
63 defusedxml
64 itemadapter
65 itemloaders
66 lxml
67 packaging
68 parsel
69 protego
70 pydispatcher
71 pyopenssl
72 queuelib
73 service-identity
74 tldextract
75 twisted
76 w3lib
77 zope-interface
78 ];
79
80 nativeCheckInputs = [
81 botocore
82 glibcLocales
83 jmespath
84 pexpect
85 pytest-xdist
86 pytestCheckHook
87 sybil
88 testfixtures
89 uvloop
90 ];
91
92 LC_ALL = "en_US.UTF-8";
93
94 disabledTestPaths = [
95 "tests/test_proxy_connect.py"
96 "tests/test_utils_display.py"
97 "tests/test_command_check.py"
98 # Don't test the documentation
99 "docs"
100 ];
101
102 disabledTests =
103 [
104 # Requires network access
105 "AnonymousFTPTestCase"
106 "FTPFeedStorageTest"
107 "FeedExportTest"
108 "test_custom_asyncio_loop_enabled_true"
109 "test_custom_loop_asyncio"
110 "test_custom_loop_asyncio_deferred_signal"
111 "FileFeedStoragePreFeedOptionsTest" # https://github.com/scrapy/scrapy/issues/5157
112 "test_persist"
113 "test_timeout_download_from_spider_nodata_rcvd"
114 "test_timeout_download_from_spider_server_hangs"
115 "test_unbounded_response"
116 "CookiesMiddlewareTest"
117 # Test fails on Hydra
118 "test_start_requests_laziness"
119 ]
120 ++ lib.optionals stdenv.isDarwin [
121 "test_xmliter_encoding"
122 "test_download"
123 "test_reactor_default_twisted_reactor_select"
124 "URIParamsSettingTest"
125 "URIParamsFeedOptionTest"
126 # flaky on darwin-aarch64
127 "test_fixed_delay"
128 "test_start_requests_laziness"
129 ];
130
131 postInstall = ''
132 installManPage extras/scrapy.1
133 installShellCompletion --cmd scrapy \
134 --zsh extras/scrapy_zsh_completion \
135 --bash extras/scrapy_bash_completion
136 '';
137
138 pythonImportsCheck = [ "scrapy" ];
139
140 __darwinAllowLocalNetworking = true;
141
142 meta = with lib; {
143 description = "High-level web crawling and web scraping framework";
144 mainProgram = "scrapy";
145 longDescription = ''
146 Scrapy is a fast high-level web crawling and web scraping framework, used to crawl
147 websites and extract structured data from their pages. It can be used for a wide
148 range of purposes, from data mining to monitoring and automated testing.
149 '';
150 homepage = "https://scrapy.org/";
151 changelog = "https://github.com/scrapy/scrapy/raw/${version}/docs/news.rst";
152 license = licenses.bsd3;
153 maintainers = with maintainers; [ vinnymeller ];
154 };
155}