1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5 glibcLocales,
6 python,
7 isPy3k,
8}:
9
10buildPythonPackage rec {
11 pname = "jieba";
12 version = "0.42.1";
13 format = "setuptools";
14
15 # no tests in PyPI tarball
16 src = fetchFromGitHub {
17 owner = "fxsjy";
18 repo = pname;
19 rev = "v${version}";
20 sha256 = "028vmd6sj6wn9l1ilw7qfmlpyiysnlzdgdlhwxs6j4fvq0gyrwxk";
21 };
22
23 nativeCheckInputs = [ glibcLocales ];
24
25 # UnicodeEncodeError
26 doCheck = isPy3k;
27
28 # Citing https://github.com/fxsjy/jieba/issues/384: "testcases is in a mess"
29 # So just picking random ones that currently work
30 checkPhase = ''
31 export LC_ALL=en_US.UTF-8
32 ${python.interpreter} test/test.py
33 ${python.interpreter} test/test_tokenize.py
34 '';
35
36 meta = with lib; {
37 description = "Chinese Words Segementation Utilities";
38 homepage = "https://github.com/fxsjy/jieba";
39 license = licenses.mit;
40 maintainers = teams.tts.members;
41 };
42}