1{ lib
2, buildPythonPackage
3, regex
4, langcodes
5, ftfy
6, msgpack
7, mecab-python3
8, jieba
9, pytest
10, pythonOlder
11, fetchFromGitHub
12}:
13
14buildPythonPackage rec {
15 pname = "wordfreq";
16 version = "2.3.2";
17 disabled = pythonOlder "3";
18
19 src = fetchFromGitHub {
20 owner = "LuminosoInsight";
21 repo = "wordfreq";
22 # upstream don't tag by version
23 rev = "v${version}";
24 sha256 = "078657iiksrqzcc2wvwhiilf3xxq5vlinsv0kz03qzqr1qyvbmas";
25 };
26
27 propagatedBuildInputs = [ regex langcodes ftfy msgpack mecab-python3 jieba ];
28
29 # patch to relax version requirements for regex
30 # dependency to prevent break in upgrade
31 postPatch = ''
32 substituteInPlace setup.py --replace "regex ==" "regex >="
33 '';
34
35 checkInputs = [ pytest ];
36
37 checkPhase = ''
38 # These languages require additional dictionaries
39 pytest tests -k 'not test_japanese and not test_korean and not test_languages and not test_french_and_related'
40 '';
41
42 meta = with lib; {
43 description = "A library for looking up the frequencies of words in many languages, based on many sources of data";
44 homepage = "https://github.com/LuminosoInsight/wordfreq/";
45 license = licenses.mit;
46 maintainers = with maintainers; [ ixxie ];
47 };
48}