1{ stdenv
2, lib
3, buildPythonPackage
4, fetchPypi
5, fetchzip
6, cython
7, numpy
8, scikit-learn
9, six
10, setuptools-scm
11, gcc
12, pytest
13, pytest-cov
14, isPy27
15}:
16let
17 testData = fetchzip {
18 url = "http://mattmahoney.net/dc/text8.zip";
19 sha256 = "0w3l64bww9znmmvd9cqbfmh3dddnlrjicz43y5qq6fhi9cfqjfar";
20 };
21in
22buildPythonPackage rec {
23 pname = "word2vec";
24 version = "0.11.1";
25 disabled = isPy27;
26
27 src = fetchPypi {
28 inherit pname version;
29 sha256 = "222d8ffb47f385c43eba45e3f308e605fc9736b2b7137d74979adf1a31e7c8b4";
30 };
31
32 nativeBuildInputs = [ setuptools-scm gcc ];
33
34 propagatedBuildInputs = [ cython numpy scikit-learn six ];
35
36 checkInputs = [ pytest pytest-cov ];
37
38 # Checks require test data downloaded separately
39 # See project source Makefile:test-data rule for reference
40 checkPhase = ''
41 PATH=$PATH:$out/bin
42 mkdir data
43 head -c 100000 ${testData}/text8 > data/text8-small
44 pytest
45 '';
46
47 meta = with lib; {
48 broken = stdenv.isDarwin;
49 description = "Tool for computing continuous distributed representations of words";
50 homepage = "https://github.com/danielfrg/word2vec";
51 license = licenses.asl20;
52 maintainers = with maintainers; [ NikolaMandic ];
53 };
54
55}