1{ lib
2, stdenv
3, botocore
4, buildPythonPackage
5, cryptography
6, cssselect
7, fetchPypi
8, fetchpatch
9, glibcLocales
10, installShellFiles
11, itemadapter
12, itemloaders
13, jmespath
14, lxml
15, parsel
16, protego
17, pydispatcher
18, pyopenssl
19, pytestCheckHook
20, pythonOlder
21, queuelib
22, service-identity
23, sybil
24, testfixtures
25, twisted
26, w3lib
27, zope_interface
28}:
29
30buildPythonPackage rec {
31 pname = "scrapy";
32 version = "2.5.1";
33 disabled = pythonOlder "3.6";
34
35 src = fetchPypi {
36 inherit version;
37 pname = "Scrapy";
38 sha256 = "13af6032476ab4256158220e530411290b3b934dd602bb6dacacbf6d16141f49";
39 };
40
41 nativeBuildInputs = [
42 installShellFiles
43 ];
44
45 propagatedBuildInputs = [
46 cryptography
47 cssselect
48 itemadapter
49 itemloaders
50 lxml
51 parsel
52 protego
53 pydispatcher
54 pyopenssl
55 queuelib
56 service-identity
57 twisted
58 w3lib
59 zope_interface
60 ];
61
62 checkInputs = [
63 botocore
64 glibcLocales
65 jmespath
66 pytestCheckHook
67 sybil
68 testfixtures
69 ];
70
71 patches = [
72 # Require setuptools, https://github.com/scrapy/scrapy/pull/5122
73 (fetchpatch {
74 name = "add-setuptools.patch";
75 url = "https://github.com/scrapy/scrapy/commit/4f500342c8ad4674b191e1fab0d1b2ac944d7d3e.patch";
76 sha256 = "14030sfv1cf7dy4yww02b49mg39cfcg4bv7ys1iwycfqag3xcjda";
77 })
78 # Make Twisted[http2] installation optional, https://github.com/scrapy/scrapy/pull/5113
79 (fetchpatch {
80 name = "remove-h2.patch";
81 url = "https://github.com/scrapy/scrapy/commit/c5b1ee810167266fcd259f263dbfc0fe0204761a.patch";
82 sha256 = "0sa39yx9my4nqww8a12bk9zagx7b56vwy7xpxm4xgjapjl6mcc0k";
83 excludes = [ "tox.ini" ];
84 })
85 ];
86
87 LC_ALL = "en_US.UTF-8";
88
89 # Disable doctest plugin because it causes pytest to hang
90 preCheck = ''
91 substituteInPlace pytest.ini --replace "--doctest-modules" ""
92 '';
93
94 disabledTestPaths = [
95 "tests/test_proxy_connect.py"
96 "tests/test_utils_display.py"
97 "tests/test_command_check.py"
98 # Don't test the documentation
99 "docs"
100 ];
101
102 disabledTests = [
103 # It's unclear if the failures are related to libxml2, https://github.com/NixOS/nixpkgs/pull/123890
104 "test_nested_css"
105 "test_nested_xpath"
106 "test_flavor_detection"
107 # Requires network access
108 "FTPFeedStorageTest"
109 "FeedExportTest"
110 "test_custom_asyncio_loop_enabled_true"
111 "test_custom_loop_asyncio"
112 "test_custom_loop_asyncio_deferred_signal"
113 "FileFeedStoragePreFeedOptionsTest" # https://github.com/scrapy/scrapy/issues/5157
114 # Fails with AssertionError
115 "test_peek_fifo"
116 "test_peek_one_element"
117 "test_peek_lifo"
118 ] ++ lib.optionals stdenv.isDarwin [
119 "test_xmliter_encoding"
120 "test_download"
121 ];
122
123 postInstall = ''
124 installManPage extras/scrapy.1
125 install -m 644 -D extras/scrapy_bash_completion $out/share/bash-completion/completions/scrapy
126 install -m 644 -D extras/scrapy_zsh_completion $out/share/zsh/site-functions/_scrapy
127 '';
128
129 pythonImportsCheck = [ "scrapy" ];
130
131 __darwinAllowLocalNetworking = true;
132
133 meta = with lib; {
134 description = "High-level web crawling and web scraping framework";
135 longDescription = ''
136 Scrapy is a fast high-level web crawling and web scraping framework, used to crawl
137 websites and extract structured data from their pages. It can be used for a wide
138 range of purposes, from data mining to monitoring and automated testing.
139 '';
140 homepage = "https://scrapy.org/";
141 changelog = "https://github.com/scrapy/scrapy/raw/${version}/docs/news.rst";
142 license = licenses.bsd3;
143 maintainers = with maintainers; [ drewkett marsam ];
144 platforms = platforms.unix;
145 };
146}