1{ stdenv
2, buildPythonPackage
3, isPy27
4, fetchPypi
5, glibcLocales
6, pytest
7, testfixtures
8, pillow
9, twisted
10, cryptography
11, w3lib
12, lxml
13, queuelib
14, pyopenssl
15, service-identity
16, parsel
17, pydispatcher
18, cssselect
19, zope_interface
20, protego
21, lib
22, jmespath
23, sybil
24, pytest-twisted
25, botocore
26, itemadapter
27, itemloaders
28}:
29
30buildPythonPackage rec {
31 version = "2.3.0";
32 pname = "Scrapy";
33
34 disabled = isPy27;
35
36 checkInputs = [
37 glibcLocales
38 jmespath
39 pytest
40 sybil
41 testfixtures
42 pillow
43 pytest-twisted
44 botocore
45 ];
46
47 propagatedBuildInputs = [
48 twisted
49 cryptography
50 cssselect
51 lxml
52 parsel
53 pydispatcher
54 pyopenssl
55 queuelib
56 service-identity
57 w3lib
58 zope_interface
59 protego
60 itemadapter
61 itemloaders
62 ];
63
64 LC_ALL = "en_US.UTF-8";
65
66 # Disable doctest plugin—enabled in the shipped pytest.ini—because it causes pytest to hang
67 # Ignore proxy tests because requires mitmproxy
68 # Ignore utils_display tests because it requires pygments
69 # Ignore test_retry_dns_error because tries to resolve an invalid dns and weirdly fails with "Reactor was unclean"
70 # Ignore xml encoding test on darwin because lxml can't find encodings https://bugs.launchpad.net/lxml/+bug/707396
71 checkPhase = ''
72 substituteInPlace pytest.ini --replace "--doctest-modules" ""
73 pytest --ignore=tests/test_linkextractors_deprecated.py --ignore=tests/test_proxy_connect.py --ignore=tests/test_utils_display.py --deselect tests/test_crawl.py::CrawlTestCase::test_retry_dns_error ${lib.optionalString stdenv.isDarwin "--deselect tests/test_utils_iterators.py::LxmlXmliterTestCase::test_xmliter_encoding"}
74 '';
75
76 src = fetchPypi {
77 inherit pname version;
78 sha256 = "b4d08cdacb615563c291d053ef1ba2dc08d9d4b6d81578684eaa1cf7b832f90c";
79 };
80
81 postInstall = ''
82 install -m 644 -D extras/scrapy.1 $out/share/man/man1/scrapy.1
83 install -m 644 -D extras/scrapy_bash_completion $out/share/bash-completion/completions/scrapy
84 install -m 644 -D extras/scrapy_zsh_completion $out/share/zsh/site-functions/_scrapy
85 '';
86
87 meta = with lib; {
88 description = "A fast high-level web crawling and web scraping framework, used to crawl websites and extract structured data from their pages";
89 homepage = "https://scrapy.org/";
90 license = licenses.bsd3;
91 maintainers = with maintainers; [ drewkett marsam ];
92 platforms = platforms.unix;
93 };
94}