1{
2 lib,
3 buildPythonPackage,
4 colorlog,
5 dataclasses-json,
6 fetchPypi,
7 nltk-data,
8 numpy,
9 pandas,
10 poetry-core,
11 pydantic,
12 pydateinfer,
13 python-dateutil,
14 pythonOlder,
15 pythonRelaxDepsHook,
16 scipy,
17 symlinkJoin,
18 type-infer,
19}:
20let
21 testNltkData = symlinkJoin {
22 name = "nltk-test-data";
23 paths = [
24 nltk-data.punkt
25 nltk-data.stopwords
26 ];
27 };
28in
29buildPythonPackage rec {
30 pname = "dataprep-ml";
31 version = "24.5.1.2";
32 pyproject = true;
33
34 disabled = pythonOlder "3.8";
35
36 # using PyPI as github repo does not contain tags or release branches
37 src = fetchPypi {
38 pname = "dataprep_ml";
39 inherit version;
40 hash = "sha256-pZhHlNcQJLBww7ur2Z6Yb2IdbRsBtjzQAzfa4UzGKt4=";
41 };
42
43 pythonRelaxDeps = [ "pydantic" ];
44
45 nativeBuildInputs = [
46 poetry-core
47 pythonRelaxDepsHook
48 ];
49
50 propagatedBuildInputs = [
51 colorlog
52 dataclasses-json
53 numpy
54 pandas
55 pydantic
56 pydateinfer
57 python-dateutil
58 scipy
59 type-infer
60 ];
61
62 # PyPI tarball has no tests
63 doCheck = false;
64
65 # Package import requires NLTK data to be downloaded
66 # It is the only way to set NLTK_DATA environment variable,
67 # so that it is available in pythonImportsCheck
68 env.NLTK_DATA = testNltkData;
69 pythonImportsCheck = [
70 "dataprep_ml"
71 "dataprep_ml.cleaners"
72 "dataprep_ml.helpers"
73 "dataprep_ml.imputers"
74 "dataprep_ml.insights"
75 "dataprep_ml.recommenders"
76 "dataprep_ml.splitters"
77 ];
78
79 meta = with lib; {
80 description = "Data utilities for Machine Learning pipelines";
81 homepage = "https://github.com/mindsdb/dataprep_ml";
82 license = licenses.gpl3Only;
83 maintainers = with maintainers; [ mbalatsko ];
84 };
85}