1{
2 lib,
3 buildPythonPackage,
4 colorlog,
5 dataclasses-json,
6 fetchPypi,
7 nltk-data,
8 numpy,
9 pandas,
10 poetry-core,
11 pydantic,
12 pydateinfer,
13 python-dateutil,
14 pythonOlder,
15 scipy,
16 symlinkJoin,
17 type-infer,
18}:
19let
20 testNltkData = symlinkJoin {
21 name = "nltk-test-data";
22 paths = [
23 nltk-data.punkt
24 nltk-data.stopwords
25 ];
26 };
27in
28buildPythonPackage rec {
29 pname = "dataprep-ml";
30 version = "24.5.1.2";
31 pyproject = true;
32
33 disabled = pythonOlder "3.8";
34
35 # using PyPI as github repo does not contain tags or release branches
36 src = fetchPypi {
37 pname = "dataprep_ml";
38 inherit version;
39 hash = "sha256-pZhHlNcQJLBww7ur2Z6Yb2IdbRsBtjzQAzfa4UzGKt4=";
40 };
41
42 pythonRelaxDeps = [ "pydantic" ];
43
44 nativeBuildInputs = [
45 poetry-core
46 ];
47
48 propagatedBuildInputs = [
49 colorlog
50 dataclasses-json
51 numpy
52 pandas
53 pydantic
54 pydateinfer
55 python-dateutil
56 scipy
57 type-infer
58 ];
59
60 # PyPI tarball has no tests
61 doCheck = false;
62
63 # Package import requires NLTK data to be downloaded
64 # It is the only way to set NLTK_DATA environment variable,
65 # so that it is available in pythonImportsCheck
66 env.NLTK_DATA = testNltkData;
67 pythonImportsCheck = [
68 "dataprep_ml"
69 "dataprep_ml.cleaners"
70 "dataprep_ml.helpers"
71 "dataprep_ml.imputers"
72 "dataprep_ml.insights"
73 "dataprep_ml.recommenders"
74 "dataprep_ml.splitters"
75 ];
76
77 meta = with lib; {
78 description = "Data utilities for Machine Learning pipelines";
79 homepage = "https://github.com/mindsdb/dataprep_ml";
80 license = licenses.gpl3Only;
81 maintainers = with maintainers; [ mbalatsko ];
82 };
83}