1{ lib
2, buildPythonPackage
3, fetchPypi
4, fetchzip
5, cython
6, numpy
7, scikit-learn
8, six
9, setuptools-scm
10, gcc
11, pytest
12, pytest-cov
13, isPy27
14}:
15let
16 testData = fetchzip {
17 url = "http://mattmahoney.net/dc/text8.zip";
18 sha256 = "0w3l64bww9znmmvd9cqbfmh3dddnlrjicz43y5qq6fhi9cfqjfar";
19 };
20in
21buildPythonPackage rec {
22 pname = "word2vec";
23 version = "0.11.1";
24 disabled = isPy27;
25
26 src = fetchPypi {
27 inherit pname version;
28 sha256 = "222d8ffb47f385c43eba45e3f308e605fc9736b2b7137d74979adf1a31e7c8b4";
29 };
30
31 nativeBuildInputs = [ setuptools-scm gcc ];
32
33 propagatedBuildInputs = [ cython numpy scikit-learn six ];
34
35 checkInputs = [ pytest pytest-cov ];
36
37 # Checks require test data downloaded separately
38 # See project source Makefile:test-data rule for reference
39 checkPhase = ''
40 PATH=$PATH:$out/bin
41 mkdir data
42 head -c 100000 ${testData}/text8 > data/text8-small
43 pytest
44 '';
45
46 meta = with lib; {
47 description = "Tool for computing continuous distributed representations of words";
48 homepage = "https://github.com/danielfrg/word2vec";
49 license = licenses.asl20;
50 maintainers = with maintainers; [ NikolaMandic ];
51 };
52
53}