pkgs/development/python-modules/scrapy/default.nix at 23.11-beta · pyrox.dev/nixpkgs

pyrox.dev / nixpkgs
lol
nixpkgs / pkgs / development / python-modules / scrapy / default.nix
at 23.11-beta 3.9 kB view raw
  1{ lib
  2, stdenv
  3, botocore
  4, buildPythonPackage
  5, cryptography
  6, cssselect
  7, fetchPypi
  8, fetchpatch
  9, glibcLocales
 10, installShellFiles
 11, itemadapter
 12, itemloaders
 13, jmespath
 14, lxml
 15, packaging
 16, parsel
 17, pexpect
 18, protego
 19, pydispatcher
 20, pyopenssl
 21, pytestCheckHook
 22, pythonOlder
 23, queuelib
 24, service-identity
 25, sybil
 26, testfixtures
 27, tldextract
 28, twisted
 29, w3lib
 30, zope_interface
 31}:
 32
 33buildPythonPackage rec {
 34  pname = "scrapy";
 35  version = "2.11.0";
 36  format = "setuptools";
 37
 38  disabled = pythonOlder "3.8";
 39
 40  src = fetchPypi {
 41    inherit version;
 42    pname = "Scrapy";
 43    hash = "sha256-PL3tzgw/DgSC1hvi10WGg758188UsO5q37rduA9bNqU=";
 44  };
 45
 46  patches = [
 47    # Fix compatiblity with Twisted>=23.8. Remove with the next release.
 48    (fetchpatch {
 49      url = "https://github.com/scrapy/scrapy/commit/aa95ada42cdf570f840f55c463375f8a81b303f8.patch";
 50      hash = "sha256-LuhA5BqtjSUgkotplvUCtvGNYOTrl0MJRCXiSBMDFzY=";
 51      excludes = [
 52        "tests/CrawlerProcess/sleeping.py"
 53        "tests/test_crawler.py"
 54      ];
 55    })
 56  ];
 57
 58  nativeBuildInputs = [
 59    installShellFiles
 60  ];
 61
 62  propagatedBuildInputs = [
 63    cryptography
 64    cssselect
 65    itemadapter
 66    itemloaders
 67    lxml
 68    packaging
 69    parsel
 70    protego
 71    pydispatcher
 72    pyopenssl
 73    queuelib
 74    service-identity
 75    tldextract
 76    twisted
 77    w3lib
 78    zope_interface
 79  ];
 80
 81  nativeCheckInputs = [
 82    botocore
 83    glibcLocales
 84    jmespath
 85    pexpect
 86    pytestCheckHook
 87    sybil
 88    testfixtures
 89  ];
 90
 91  LC_ALL = "en_US.UTF-8";
 92
 93  disabledTestPaths = [
 94    "tests/test_proxy_connect.py"
 95    "tests/test_utils_display.py"
 96    "tests/test_command_check.py"
 97    # Don't test the documentation
 98    "docs"
 99  ];
100
101  disabledTests = [
102    # It's unclear if the failures are related to libxml2, https://github.com/NixOS/nixpkgs/pull/123890
103    "test_nested_css"
104    "test_nested_xpath"
105    "test_flavor_detection"
106    "test_follow_whitespace"
107    # Requires network access
108    "AnonymousFTPTestCase"
109    "FTPFeedStorageTest"
110    "FeedExportTest"
111    "test_custom_asyncio_loop_enabled_true"
112    "test_custom_loop_asyncio"
113    "test_custom_loop_asyncio_deferred_signal"
114    "FileFeedStoragePreFeedOptionsTest"  # https://github.com/scrapy/scrapy/issues/5157
115    "test_persist"
116    "test_timeout_download_from_spider_nodata_rcvd"
117    "test_timeout_download_from_spider_server_hangs"
118    "test_unbounded_response"
119    "CookiesMiddlewareTest"
120    # Depends on uvloop
121    "test_asyncio_enabled_reactor_different_loop"
122    "test_asyncio_enabled_reactor_same_loop"
123    # Fails with AssertionError
124    "test_peek_fifo"
125    "test_peek_one_element"
126    "test_peek_lifo"
127    "test_callback_kwargs"
128    # Test fails on Hydra
129    "test_start_requests_laziness"
130  ] ++ lib.optionals stdenv.isDarwin [
131    "test_xmliter_encoding"
132    "test_download"
133    "test_reactor_default_twisted_reactor_select"
134    "URIParamsSettingTest"
135    "URIParamsFeedOptionTest"
136    # flaky on darwin-aarch64
137    "test_fixed_delay"
138    "test_start_requests_laziness"
139  ];
140
141  postInstall = ''
142    installManPage extras/scrapy.1
143    installShellCompletion --cmd scrapy \
144      --zsh extras/scrapy_zsh_completion \
145      --bash extras/scrapy_bash_completion
146  '';
147
148  pythonImportsCheck = [
149    "scrapy"
150  ];
151
152  __darwinAllowLocalNetworking = true;
153
154  meta = with lib; {
155    description = "High-level web crawling and web scraping framework";
156    longDescription = ''
157      Scrapy is a fast high-level web crawling and web scraping framework, used to crawl
158      websites and extract structured data from their pages. It can be used for a wide
159      range of purposes, from data mining to monitoring and automated testing.
160    '';
161    homepage = "https://scrapy.org/";
162    changelog = "https://github.com/scrapy/scrapy/raw/${version}/docs/news.rst";
163    license = licenses.bsd3;
164    maintainers = with maintainers; [ marsam ];
165  };
166}