1{ lib
2, stdenv
3, botocore
4, buildPythonPackage
5, cryptography
6, cssselect
7, fetchPypi
8, fetchpatch
9, glibcLocales
10, installShellFiles
11, itemadapter
12, itemloaders
13, jmespath
14, lxml
15, packaging
16, parsel
17, protego
18, pydispatcher
19, pyopenssl
20, pytestCheckHook
21, pythonOlder
22, queuelib
23, service-identity
24, sybil
25, testfixtures
26, tldextract
27, twisted
28, w3lib
29, zope_interface
30}:
31
32buildPythonPackage rec {
33 pname = "scrapy";
34 version = "2.7.1";
35 format = "setuptools";
36
37 disabled = pythonOlder "3.7";
38
39 src = fetchPypi {
40 inherit version;
41 pname = "Scrapy";
42 hash = "sha256-MPpAg1PSSx35ed8upK+9GbSuAvsiB/IY0kYzLx4c8U4=";
43 };
44
45 nativeBuildInputs = [
46 installShellFiles
47 ];
48
49 propagatedBuildInputs = [
50 cryptography
51 cssselect
52 itemadapter
53 itemloaders
54 lxml
55 packaging
56 parsel
57 protego
58 pydispatcher
59 pyopenssl
60 queuelib
61 service-identity
62 tldextract
63 twisted
64 w3lib
65 zope_interface
66 ];
67
68 checkInputs = [
69 botocore
70 glibcLocales
71 jmespath
72 pytestCheckHook
73 sybil
74 testfixtures
75 ];
76
77 LC_ALL = "en_US.UTF-8";
78
79 preCheck = ''
80 # Disable doctest plugin because it causes pytest to hang
81 substituteInPlace pytest.ini \
82 --replace "--doctest-modules" ""
83 '';
84
85 disabledTestPaths = [
86 "tests/test_proxy_connect.py"
87 "tests/test_utils_display.py"
88 "tests/test_command_check.py"
89 # Don't test the documentation
90 "docs"
91 ];
92
93 disabledTests = [
94 # It's unclear if the failures are related to libxml2, https://github.com/NixOS/nixpkgs/pull/123890
95 "test_nested_css"
96 "test_nested_xpath"
97 "test_flavor_detection"
98 # Requires network access
99 "AnonymousFTPTestCase"
100 "FTPFeedStorageTest"
101 "FeedExportTest"
102 "test_custom_asyncio_loop_enabled_true"
103 "test_custom_loop_asyncio"
104 "test_custom_loop_asyncio_deferred_signal"
105 "FileFeedStoragePreFeedOptionsTest" # https://github.com/scrapy/scrapy/issues/5157
106 "test_timeout_download_from_spider_nodata_rcvd"
107 "test_timeout_download_from_spider_server_hangs"
108 # Depends on uvloop
109 "test_asyncio_enabled_reactor_different_loop"
110 "test_asyncio_enabled_reactor_same_loop"
111 # Fails with AssertionError
112 "test_peek_fifo"
113 "test_peek_one_element"
114 "test_peek_lifo"
115 "test_callback_kwargs"
116 ] ++ lib.optionals stdenv.isDarwin [
117 "test_xmliter_encoding"
118 "test_download"
119 "test_reactor_default_twisted_reactor_select"
120 ];
121
122 postInstall = ''
123 installManPage extras/scrapy.1
124 install -m 644 -D extras/scrapy_bash_completion $out/share/bash-completion/completions/scrapy
125 install -m 644 -D extras/scrapy_zsh_completion $out/share/zsh/site-functions/_scrapy
126 '';
127
128 pythonImportsCheck = [
129 "scrapy"
130 ];
131
132 __darwinAllowLocalNetworking = true;
133
134 meta = with lib; {
135 description = "High-level web crawling and web scraping framework";
136 longDescription = ''
137 Scrapy is a fast high-level web crawling and web scraping framework, used to crawl
138 websites and extract structured data from their pages. It can be used for a wide
139 range of purposes, from data mining to monitoring and automated testing.
140 '';
141 homepage = "https://scrapy.org/";
142 changelog = "https://github.com/scrapy/scrapy/raw/${version}/docs/news.rst";
143 license = licenses.bsd3;
144 maintainers = with maintainers; [ marsam ];
145 platforms = platforms.unix;
146 };
147}