nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
1{ lib
2, stdenv
3, botocore
4, buildPythonPackage
5, cryptography
6, cssselect
7, fetchPypi
8, fetchpatch
9, glibcLocales
10, installShellFiles
11, itemadapter
12, itemloaders
13, jmespath
14, lxml
15, parsel
16, protego
17, pydispatcher
18, pyopenssl
19, pytestCheckHook
20, pythonOlder
21, queuelib
22, service-identity
23, sybil
24, testfixtures
25, tldextract
26, twisted
27, w3lib
28, zope_interface
29}:
30
31buildPythonPackage rec {
32 pname = "scrapy";
33 version = "2.6.1";
34 format = "setuptools";
35
36 disabled = pythonOlder "3.6";
37
38 src = fetchPypi {
39 inherit version;
40 pname = "Scrapy";
41 sha256 = "56fd55a59d0f329ce752892358abee5a6b50b4fc55a40420ea317dc617553827";
42 };
43
44 nativeBuildInputs = [
45 installShellFiles
46 ];
47
48 propagatedBuildInputs = [
49 cryptography
50 cssselect
51 itemadapter
52 itemloaders
53 lxml
54 parsel
55 protego
56 pydispatcher
57 pyopenssl
58 queuelib
59 service-identity
60 tldextract
61 twisted
62 w3lib
63 zope_interface
64 ];
65
66 checkInputs = [
67 botocore
68 glibcLocales
69 jmespath
70 pytestCheckHook
71 sybil
72 testfixtures
73 ];
74
75 LC_ALL = "en_US.UTF-8";
76
77 preCheck = ''
78 # Disable doctest plugin because it causes pytest to hang
79 substituteInPlace pytest.ini \
80 --replace "--doctest-modules" ""
81 '';
82
83 disabledTestPaths = [
84 "tests/test_proxy_connect.py"
85 "tests/test_utils_display.py"
86 "tests/test_command_check.py"
87 # Don't test the documentation
88 "docs"
89 ];
90
91 disabledTests = [
92 # It's unclear if the failures are related to libxml2, https://github.com/NixOS/nixpkgs/pull/123890
93 "test_nested_css"
94 "test_nested_xpath"
95 "test_flavor_detection"
96 # Requires network access
97 "AnonymousFTPTestCase"
98 "FTPFeedStorageTest"
99 "FeedExportTest"
100 "test_custom_asyncio_loop_enabled_true"
101 "test_custom_loop_asyncio"
102 "test_custom_loop_asyncio_deferred_signal"
103 "FileFeedStoragePreFeedOptionsTest" # https://github.com/scrapy/scrapy/issues/5157
104 "test_timeout_download_from_spider_nodata_rcvd"
105 "test_timeout_download_from_spider_server_hangs"
106 # Fails with AssertionError
107 "test_peek_fifo"
108 "test_peek_one_element"
109 "test_peek_lifo"
110 "test_callback_kwargs"
111 ] ++ lib.optionals stdenv.isDarwin [
112 "test_xmliter_encoding"
113 "test_download"
114 ];
115
116 postInstall = ''
117 installManPage extras/scrapy.1
118 install -m 644 -D extras/scrapy_bash_completion $out/share/bash-completion/completions/scrapy
119 install -m 644 -D extras/scrapy_zsh_completion $out/share/zsh/site-functions/_scrapy
120 '';
121
122 pythonImportsCheck = [
123 "scrapy"
124 ];
125
126 __darwinAllowLocalNetworking = true;
127
128 meta = with lib; {
129 description = "High-level web crawling and web scraping framework";
130 longDescription = ''
131 Scrapy is a fast high-level web crawling and web scraping framework, used to crawl
132 websites and extract structured data from their pages. It can be used for a wide
133 range of purposes, from data mining to monitoring and automated testing.
134 '';
135 homepage = "https://scrapy.org/";
136 changelog = "https://github.com/scrapy/scrapy/raw/${version}/docs/news.rst";
137 license = licenses.bsd3;
138 maintainers = with maintainers; [ drewkett marsam ];
139 platforms = platforms.unix;
140 };
141}