1{ 2 lib, 3 buildPythonPackage, 4 colorlog, 5 dataclasses-json, 6 fetchPypi, 7 nltk-data, 8 numpy, 9 pandas, 10 poetry-core, 11 pydantic, 12 pydateinfer, 13 python-dateutil, 14 pythonOlder, 15 pythonRelaxDepsHook, 16 scipy, 17 symlinkJoin, 18 type-infer, 19}: 20let 21 testNltkData = symlinkJoin { 22 name = "nltk-test-data"; 23 paths = [ 24 nltk-data.punkt 25 nltk-data.stopwords 26 ]; 27 }; 28in 29buildPythonPackage rec { 30 pname = "dataprep-ml"; 31 version = "24.5.1.2"; 32 pyproject = true; 33 34 disabled = pythonOlder "3.8"; 35 36 # using PyPI as github repo does not contain tags or release branches 37 src = fetchPypi { 38 pname = "dataprep_ml"; 39 inherit version; 40 hash = "sha256-pZhHlNcQJLBww7ur2Z6Yb2IdbRsBtjzQAzfa4UzGKt4="; 41 }; 42 43 pythonRelaxDeps = [ "pydantic" ]; 44 45 nativeBuildInputs = [ 46 poetry-core 47 pythonRelaxDepsHook 48 ]; 49 50 propagatedBuildInputs = [ 51 colorlog 52 dataclasses-json 53 numpy 54 pandas 55 pydantic 56 pydateinfer 57 python-dateutil 58 scipy 59 type-infer 60 ]; 61 62 # PyPI tarball has no tests 63 doCheck = false; 64 65 # Package import requires NLTK data to be downloaded 66 # It is the only way to set NLTK_DATA environment variable, 67 # so that it is available in pythonImportsCheck 68 env.NLTK_DATA = testNltkData; 69 pythonImportsCheck = [ 70 "dataprep_ml" 71 "dataprep_ml.cleaners" 72 "dataprep_ml.helpers" 73 "dataprep_ml.imputers" 74 "dataprep_ml.insights" 75 "dataprep_ml.recommenders" 76 "dataprep_ml.splitters" 77 ]; 78 79 meta = with lib; { 80 description = "Data utilities for Machine Learning pipelines"; 81 homepage = "https://github.com/mindsdb/dataprep_ml"; 82 license = licenses.gpl3Only; 83 maintainers = with maintainers; [ mbalatsko ]; 84 }; 85}