1{ lib, buildPythonPackage, fetchFromGitHub, glibcLocales, python, isPy3k }:
2
3buildPythonPackage rec {
4 pname = "jieba";
5 version = "0.42.1";
6
7 # no tests in PyPI tarball
8 src = fetchFromGitHub {
9 owner = "fxsjy";
10 repo = pname;
11 rev = "v${version}";
12 sha256 = "028vmd6sj6wn9l1ilw7qfmlpyiysnlzdgdlhwxs6j4fvq0gyrwxk";
13 };
14
15 checkInputs = [ glibcLocales ];
16
17 # UnicodeEncodeError
18 doCheck = isPy3k;
19
20 # Citing https://github.com/fxsjy/jieba/issues/384: "testcases is in a mess"
21 # So just picking random ones that currently work
22 checkPhase = ''
23 export LC_ALL=en_US.UTF-8
24 ${python.interpreter} test/test.py
25 ${python.interpreter} test/test_tokenize.py
26 '';
27
28 meta = with lib; {
29 description = "Chinese Words Segementation Utilities";
30 homepage = "https://github.com/fxsjy/jieba";
31 license = licenses.mit;
32 maintainers = teams.tts.members;
33 };
34}