1{
2 lib,
3 stdenv,
4 fetchFromGitHub,
5 buildPythonPackage,
6 pythonOlder,
7 setuptools,
8 regex,
9}:
10
11buildPythonPackage rec {
12 pname = "somajo";
13 version = "2.4.2";
14 pyproject = true;
15
16 disabled = pythonOlder "3.7";
17
18 src = fetchFromGitHub {
19 owner = "tsproisl";
20 repo = "SoMaJo";
21 rev = "refs/tags/v${version}";
22 hash = "sha256-5rlgDnPYTtuVMincG5CgVwNh/IGmZk6ItvzdB/wHmgg=";
23 };
24
25 nativeBuildInputs = [ setuptools ];
26
27 propagatedBuildInputs = [ regex ];
28
29 # loops forever
30 doCheck = !stdenv.isDarwin;
31
32 pythonImportsCheck = [ "somajo" ];
33
34 meta = with lib; {
35 description = "Tokenizer and sentence splitter for German and English web texts";
36 mainProgram = "somajo-tokenizer";
37 homepage = "https://github.com/tsproisl/SoMaJo";
38 license = licenses.gpl3Plus;
39 maintainers = with maintainers; [ ];
40 };
41}