Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
1{ lib
2, stdenv
3, botocore
4, buildPythonPackage
5, cryptography
6, cssselect
7, fetchPypi
8, fetchpatch
9, glibcLocales
10, installShellFiles
11, itemadapter
12, itemloaders
13, jmespath
14, lxml
15, packaging
16, parsel
17, protego
18, pydispatcher
19, pyopenssl
20, pytestCheckHook
21, pythonOlder
22, queuelib
23, service-identity
24, sybil
25, testfixtures
26, tldextract
27, twisted
28, w3lib
29, zope_interface
30}:
31
32buildPythonPackage rec {
33 pname = "scrapy";
34 version = "2.7.1";
35 format = "setuptools";
36
37 disabled = pythonOlder "3.7";
38
39 src = fetchPypi {
40 inherit version;
41 pname = "Scrapy";
42 hash = "sha256-MPpAg1PSSx35ed8upK+9GbSuAvsiB/IY0kYzLx4c8U4=";
43 };
44
45 nativeBuildInputs = [
46 installShellFiles
47 ];
48
49 propagatedBuildInputs = [
50 cryptography
51 cssselect
52 itemadapter
53 itemloaders
54 lxml
55 packaging
56 parsel
57 protego
58 pydispatcher
59 pyopenssl
60 queuelib
61 service-identity
62 tldextract
63 twisted
64 w3lib
65 zope_interface
66 ];
67
68 checkInputs = [
69 botocore
70 glibcLocales
71 jmespath
72 pytestCheckHook
73 sybil
74 testfixtures
75 ];
76
77 LC_ALL = "en_US.UTF-8";
78
79 preCheck = ''
80 # Disable doctest plugin because it causes pytest to hang
81 substituteInPlace pytest.ini \
82 --replace "--doctest-modules" ""
83 '';
84
85 disabledTestPaths = [
86 "tests/test_proxy_connect.py"
87 "tests/test_utils_display.py"
88 "tests/test_command_check.py"
89 # Don't test the documentation
90 "docs"
91 ];
92
93 disabledTests = [
94 # It's unclear if the failures are related to libxml2, https://github.com/NixOS/nixpkgs/pull/123890
95 "test_nested_css"
96 "test_nested_xpath"
97 "test_flavor_detection"
98 "test_follow_whitespace"
99 # Requires network access
100 "AnonymousFTPTestCase"
101 "FTPFeedStorageTest"
102 "FeedExportTest"
103 "test_custom_asyncio_loop_enabled_true"
104 "test_custom_loop_asyncio"
105 "test_custom_loop_asyncio_deferred_signal"
106 "FileFeedStoragePreFeedOptionsTest" # https://github.com/scrapy/scrapy/issues/5157
107 "test_timeout_download_from_spider_nodata_rcvd"
108 "test_timeout_download_from_spider_server_hangs"
109 # Depends on uvloop
110 "test_asyncio_enabled_reactor_different_loop"
111 "test_asyncio_enabled_reactor_same_loop"
112 # Fails with AssertionError
113 "test_peek_fifo"
114 "test_peek_one_element"
115 "test_peek_lifo"
116 "test_callback_kwargs"
117 ] ++ lib.optionals stdenv.isDarwin [
118 "test_xmliter_encoding"
119 "test_download"
120 "test_reactor_default_twisted_reactor_select"
121 ];
122
123 postInstall = ''
124 installManPage extras/scrapy.1
125 install -m 644 -D extras/scrapy_bash_completion $out/share/bash-completion/completions/scrapy
126 install -m 644 -D extras/scrapy_zsh_completion $out/share/zsh/site-functions/_scrapy
127 '';
128
129 pythonImportsCheck = [
130 "scrapy"
131 ];
132
133 __darwinAllowLocalNetworking = true;
134
135 meta = with lib; {
136 description = "High-level web crawling and web scraping framework";
137 longDescription = ''
138 Scrapy is a fast high-level web crawling and web scraping framework, used to crawl
139 websites and extract structured data from their pages. It can be used for a wide
140 range of purposes, from data mining to monitoring and automated testing.
141 '';
142 homepage = "https://scrapy.org/";
143 changelog = "https://github.com/scrapy/scrapy/raw/${version}/docs/news.rst";
144 license = licenses.bsd3;
145 maintainers = with maintainers; [ marsam ];
146 platforms = platforms.unix;
147 };
148}