1{ lib
2, buildPythonPackage
3, fetchFromGitHub
4, pythonRelaxDepsHook
5, html-text
6, jstyleson
7, lxml
8, mf2py
9, pyrdfa3
10, rdflib
11, six
12, w3lib
13, pytestCheckHook
14, mock
15}:
16
17buildPythonPackage rec {
18 pname = "extruct";
19 version = "0.13.0";
20
21 src = fetchFromGitHub {
22 owner = "scrapinghub";
23 repo = "extruct";
24 rev = "v${version}";
25 hash = "sha256-hf6b/tZLggHzgFmZ6aldZIBd17Ni7vCTIIzhNlyjvxw=";
26 };
27
28 nativeBuildInputs = [
29 pythonRelaxDepsHook
30 ];
31
32 # rdflib-jsonld functionality is part of rdblib from version 6 onwards
33 pythonRemoveDeps = [
34 "rdflib-jsonld"
35 ];
36
37 propagatedBuildInputs = [
38 html-text
39 jstyleson
40 lxml
41 mf2py
42 pyrdfa3
43 rdflib
44 six
45 w3lib
46 ];
47
48 nativeCheckInputs = [
49 mock
50 pytestCheckHook
51 ];
52
53 pythonImportsCheck = [ "extruct" ];
54
55 meta = with lib; {
56 description = "Extract embedded metadata from HTML markup";
57 homepage = "https://github.com/scrapinghub/extruct";
58 license = licenses.bsd3;
59 maintainers = with maintainers; [ ambroisie ];
60 };
61}