1{ lib 2, buildPythonPackage 3, fetchPypi 4, pythonOlder 5, poetry-core 6, numpy 7, pandas 8, pydateinfer 9, python-dateutil 10, scipy 11, type-infer 12, dataclasses-json 13, colorlog 14, pydantic 15, nltk-data 16, symlinkJoin 17}: 18let 19 testNltkData = symlinkJoin { 20 name = "nltk-test-data"; 21 paths = [ nltk-data.punkt nltk-data.stopwords ]; 22 }; 23in 24buildPythonPackage rec { 25 pname = "dataprep-ml"; 26 version = "0.0.18"; 27 pyproject = true; 28 29 disabled = pythonOlder "3.8"; 30 31 # using PyPI as github repo does not contain tags or release branches 32 src = fetchPypi { 33 pname = "dataprep_ml"; 34 inherit version; 35 hash = "sha256-nIqyRwv62j8x5Fy7ILMLWxw6yJmkkNRE1zyUlfvRYTI="; 36 }; 37 38 nativeBuildInputs = [ 39 poetry-core 40 ]; 41 42 propagatedBuildInputs = [ 43 numpy 44 pandas 45 pydateinfer 46 python-dateutil 47 scipy 48 type-infer 49 dataclasses-json 50 colorlog 51 pydantic 52 ]; 53 54 # PyPI tarball has no tests 55 doCheck = false; 56 57 # Package import requires NLTK data to be downloaded 58 # It is the only way to set NLTK_DATA environment variable, 59 # so that it is available in pythonImportsCheck 60 env.NLTK_DATA = testNltkData; 61 pythonImportsCheck = [ 62 "dataprep_ml" 63 "dataprep_ml.cleaners" 64 "dataprep_ml.helpers" 65 "dataprep_ml.imputers" 66 "dataprep_ml.insights" 67 "dataprep_ml.recommenders" 68 "dataprep_ml.splitters" 69 ]; 70 71 meta = with lib; { 72 description = "Data utilities for Machine Learning pipelines"; 73 homepage = "https://github.com/mindsdb/dataprep_ml"; 74 license = licenses.gpl3Only; 75 maintainers = with maintainers; [ mbalatsko ]; 76 }; 77}