1{ lib
2, buildPythonPackage
3, regex
4, langcodes
5, ftfy
6, msgpack
7, mecab-python3
8, jieba
9, pytest
10, pythonOlder
11, fetchFromGitHub
12}:
13
14buildPythonPackage {
15 pname = "wordfreq";
16 version = "2.2.0";
17
18 src = fetchFromGitHub {
19 owner = "LuminosoInsight";
20 repo = "wordfreq";
21 # upstream don't tag by version
22 rev = "bc12599010c8181a725ec97d0b3990758a48da36";
23 sha256 = "195794vkzq5wsq3mg1dgfhlnz2f7vi1xajlifq6wkg4lzwyq262m";
24 };
25
26 checkInputs = [ pytest ];
27
28 checkPhase = ''
29 # These languages require additional dictionaries
30 pytest tests -k 'not test_japanese and not test_korean and not test_languages and not test_french_and_related'
31 '';
32
33 propagatedBuildInputs = [ regex langcodes ftfy msgpack mecab-python3 jieba ];
34
35 # patch to relax version requirements for regex
36 # dependency to prevent break in upgrade
37 postPatch = ''
38 substituteInPlace setup.py --replace "regex ==" "regex >="
39 '';
40
41 disabled = pythonOlder "3";
42
43 meta = with lib; {
44 description = "A library for looking up the frequencies of words in many languages, based on many sources of data";
45 homepage = https://github.com/LuminosoInsight/wordfreq/;
46 license = licenses.mit;
47 maintainers = with maintainers; [ ixxie ];
48 };
49}