1{ lib, buildPythonPackage, fetchFromGitHub, glibcLocales, python, isPy3k }: 2 3buildPythonPackage rec { 4 pname = "jieba"; 5 version = "0.39"; 6 7 # no tests in PyPI tarball 8 src = fetchFromGitHub { 9 owner = "fxsjy"; 10 repo = pname; 11 rev = "v${version}"; 12 sha256 = "0hbq0l1jbgcvm58qg4p37im4jl5a9igvq3wlhlk22pmbkbvqqgzs"; 13 }; 14 15 checkInputs = [ glibcLocales ]; 16 17 # UnicodeEncodeError 18 doCheck = isPy3k; 19 20 # Citing https://github.com/fxsjy/jieba/issues/384: "testcases is in a mess" 21 # So just picking random ones that currently work 22 checkPhase = '' 23 export LC_ALL=en_US.UTF-8 24 ${python.interpreter} test/test.py 25 ${python.interpreter} test/test_tokenize.py 26 ''; 27 28 meta = with lib; { 29 description = "Chinese Words Segementation Utilities"; 30 homepage = https://github.com/fxsjy/jieba; 31 license = licenses.mit; 32 }; 33}