pkgs/development/python-modules/scrapy/default.nix at litex · tjh.dev/nixpkgs

tjh.dev / nixpkgs
Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
nixpkgs / pkgs / development / python-modules / scrapy / default.nix
at litex 3.3 kB view raw
  1{ lib
  2, stdenv
  3, botocore
  4, buildPythonPackage
  5, cryptography
  6, cssselect
  7, fetchPypi
  8, fetchpatch
  9, glibcLocales
 10, installShellFiles
 11, itemadapter
 12, itemloaders
 13, jmespath
 14, lxml
 15, packaging
 16, parsel
 17, protego
 18, pydispatcher
 19, pyopenssl
 20, pytestCheckHook
 21, pythonOlder
 22, queuelib
 23, service-identity
 24, sybil
 25, testfixtures
 26, tldextract
 27, twisted
 28, w3lib
 29, zope_interface
 30}:
 31
 32buildPythonPackage rec {
 33  pname = "scrapy";
 34  version = "2.9.0";
 35  format = "setuptools";
 36
 37  disabled = pythonOlder "3.7";
 38
 39  src = fetchPypi {
 40    inherit version;
 41    pname = "Scrapy";
 42    hash = "sha256-VkyXK1blS4MUHzlc4/aiW/4gk9YdE/m4HQU4ThnbmNo=";
 43  };
 44
 45  nativeBuildInputs = [
 46    installShellFiles
 47  ];
 48
 49  propagatedBuildInputs = [
 50    cryptography
 51    cssselect
 52    itemadapter
 53    itemloaders
 54    lxml
 55    packaging
 56    parsel
 57    protego
 58    pydispatcher
 59    pyopenssl
 60    queuelib
 61    service-identity
 62    tldextract
 63    twisted
 64    w3lib
 65    zope_interface
 66  ];
 67
 68  nativeCheckInputs = [
 69    botocore
 70    glibcLocales
 71    jmespath
 72    pytestCheckHook
 73    sybil
 74    testfixtures
 75  ];
 76
 77  LC_ALL = "en_US.UTF-8";
 78
 79  disabledTestPaths = [
 80    "tests/test_proxy_connect.py"
 81    "tests/test_utils_display.py"
 82    "tests/test_command_check.py"
 83    # Don't test the documentation
 84    "docs"
 85  ];
 86
 87  disabledTests = [
 88    # It's unclear if the failures are related to libxml2, https://github.com/NixOS/nixpkgs/pull/123890
 89    "test_nested_css"
 90    "test_nested_xpath"
 91    "test_flavor_detection"
 92    "test_follow_whitespace"
 93    # Requires network access
 94    "AnonymousFTPTestCase"
 95    "FTPFeedStorageTest"
 96    "FeedExportTest"
 97    "test_custom_asyncio_loop_enabled_true"
 98    "test_custom_loop_asyncio"
 99    "test_custom_loop_asyncio_deferred_signal"
100    "FileFeedStoragePreFeedOptionsTest"  # https://github.com/scrapy/scrapy/issues/5157
101    "test_timeout_download_from_spider_nodata_rcvd"
102    "test_timeout_download_from_spider_server_hangs"
103    # Depends on uvloop
104    "test_asyncio_enabled_reactor_different_loop"
105    "test_asyncio_enabled_reactor_same_loop"
106    # Fails with AssertionError
107    "test_peek_fifo"
108    "test_peek_one_element"
109    "test_peek_lifo"
110    "test_callback_kwargs"
111  ] ++ lib.optionals stdenv.isDarwin [
112    "test_xmliter_encoding"
113    "test_download"
114    "test_reactor_default_twisted_reactor_select"
115    "URIParamsSettingTest"
116    "URIParamsFeedOptionTest"
117    # flaky on darwin-aarch64
118    "test_fixed_delay"
119    "test_start_requests_laziness"
120  ];
121
122  postInstall = ''
123    installManPage extras/scrapy.1
124    installShellCompletion --cmd scrapy \
125      --zsh extras/scrapy_zsh_completion \
126      --bash extras/scrapy_bash_completion
127  '';
128
129  pythonImportsCheck = [
130    "scrapy"
131  ];
132
133  __darwinAllowLocalNetworking = true;
134
135  meta = with lib; {
136    description = "High-level web crawling and web scraping framework";
137    longDescription = ''
138      Scrapy is a fast high-level web crawling and web scraping framework, used to crawl
139      websites and extract structured data from their pages. It can be used for a wide
140      range of purposes, from data mining to monitoring and automated testing.
141    '';
142    homepage = "https://scrapy.org/";
143    changelog = "https://github.com/scrapy/scrapy/raw/${version}/docs/news.rst";
144    license = licenses.bsd3;
145    maintainers = with maintainers; [ marsam ];
146  };
147}