1{ lib
2, stdenv
3, buildPythonPackage
4, isPy27
5, fetchPypi
6, glibcLocales
7, pytestCheckHook
8, testfixtures
9, pillow
10, twisted
11, cryptography
12, w3lib
13, lxml
14, queuelib
15, pyopenssl
16, service-identity
17, parsel
18, pydispatcher
19, cssselect
20, zope_interface
21, protego
22, jmespath
23, sybil
24, pytest-twisted
25, botocore
26, itemadapter
27, itemloaders
28}:
29
30buildPythonPackage rec {
31 version = "2.4.1";
32 pname = "Scrapy";
33
34 disabled = isPy27;
35
36 checkInputs = [
37 glibcLocales
38 jmespath
39 pytestCheckHook
40 sybil
41 testfixtures
42 pillow
43 pytest-twisted
44 botocore
45 ];
46
47 propagatedBuildInputs = [
48 twisted
49 cryptography
50 cssselect
51 lxml
52 parsel
53 pydispatcher
54 pyopenssl
55 queuelib
56 service-identity
57 w3lib
58 zope_interface
59 protego
60 itemadapter
61 itemloaders
62 ];
63
64 LC_ALL = "en_US.UTF-8";
65
66 # Disable doctest plugin because it causes pytest to hang
67 preCheck = ''
68 substituteInPlace pytest.ini --replace "--doctest-modules" ""
69 '';
70
71 pytestFlagsArray = [
72 "--ignore=tests/test_proxy_connect.py"
73 "--ignore=tests/test_utils_display.py"
74 "--ignore=tests/test_command_check.py"
75 ];
76
77 disabledTests = [
78 "FTPFeedStorageTest"
79 "test_noconnect"
80 "test_retry_dns_error"
81 "test_custom_asyncio_loop_enabled_true"
82 "test_custom_loop_asyncio"
83 ] ++ lib.optionals stdenv.isDarwin [
84 "test_xmliter_encoding"
85 "test_download"
86 ];
87
88 src = fetchPypi {
89 inherit pname version;
90 sha256 = "68c48f01a58636bdf0f6fcd5035a19ecf277b58af24bd70c36dc6e556df3e005";
91 };
92
93 postInstall = ''
94 install -m 644 -D extras/scrapy.1 $out/share/man/man1/scrapy.1
95 install -m 644 -D extras/scrapy_bash_completion $out/share/bash-completion/completions/scrapy
96 install -m 644 -D extras/scrapy_zsh_completion $out/share/zsh/site-functions/_scrapy
97 '';
98
99 __darwinAllowLocalNetworking = true;
100
101 meta = with lib; {
102 description = "A fast high-level web crawling and web scraping framework, used to crawl websites and extract structured data from their pages";
103 homepage = "https://scrapy.org/";
104 license = licenses.bsd3;
105 maintainers = with maintainers; [ drewkett marsam ];
106 platforms = platforms.unix;
107 };
108}