1{
2 lib,
3 stdenv,
4 fetchFromGitHub,
5 buildPythonPackage,
6 pythonOlder,
7 setuptools,
8 regex,
9}:
10
11buildPythonPackage rec {
12 pname = "somajo";
13 version = "2.4.3";
14 pyproject = true;
15
16 disabled = pythonOlder "3.8";
17
18 src = fetchFromGitHub {
19 owner = "tsproisl";
20 repo = "SoMaJo";
21 tag = "v${version}";
22 hash = "sha256-fq891LX6PukUEfrXplulhnisuPX/RqLAQ/5ty/Fvm9k=";
23 };
24
25 build-system = [ setuptools ];
26
27 dependencies = [ regex ];
28
29 # loops forever
30 doCheck = !stdenv.hostPlatform.isDarwin;
31
32 pythonImportsCheck = [ "somajo" ];
33
34 meta = with lib; {
35 description = "Tokenizer and sentence splitter for German and English web texts";
36 homepage = "https://github.com/tsproisl/SoMaJo";
37 changelog = "https://github.com/tsproisl/SoMaJo/blob/v${version}/CHANGES.txt";
38 license = licenses.gpl3Plus;
39 maintainers = [ ];
40 mainProgram = "somajo-tokenizer";
41 };
42}