Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
1{ 2 lib, 3 buildPythonPackage, 4 colorlog, 5 dataclasses-json, 6 fetchPypi, 7 nltk-data, 8 numpy, 9 pandas, 10 poetry-core, 11 pydantic, 12 pydateinfer, 13 python-dateutil, 14 pythonOlder, 15 scipy, 16 symlinkJoin, 17 type-infer, 18}: 19let 20 testNltkData = symlinkJoin { 21 name = "nltk-test-data"; 22 paths = [ 23 nltk-data.punkt 24 nltk-data.stopwords 25 ]; 26 }; 27in 28buildPythonPackage rec { 29 pname = "dataprep-ml"; 30 version = "24.5.1.2"; 31 pyproject = true; 32 33 disabled = pythonOlder "3.8"; 34 35 # using PyPI as github repo does not contain tags or release branches 36 src = fetchPypi { 37 pname = "dataprep_ml"; 38 inherit version; 39 hash = "sha256-pZhHlNcQJLBww7ur2Z6Yb2IdbRsBtjzQAzfa4UzGKt4="; 40 }; 41 42 pythonRelaxDeps = [ "pydantic" ]; 43 44 nativeBuildInputs = [ 45 poetry-core 46 ]; 47 48 propagatedBuildInputs = [ 49 colorlog 50 dataclasses-json 51 numpy 52 pandas 53 pydantic 54 pydateinfer 55 python-dateutil 56 scipy 57 type-infer 58 ]; 59 60 # PyPI tarball has no tests 61 doCheck = false; 62 63 # Package import requires NLTK data to be downloaded 64 # It is the only way to set NLTK_DATA environment variable, 65 # so that it is available in pythonImportsCheck 66 env.NLTK_DATA = testNltkData; 67 pythonImportsCheck = [ 68 "dataprep_ml" 69 "dataprep_ml.cleaners" 70 "dataprep_ml.helpers" 71 "dataprep_ml.imputers" 72 "dataprep_ml.insights" 73 "dataprep_ml.recommenders" 74 "dataprep_ml.splitters" 75 ]; 76 77 meta = with lib; { 78 description = "Data utilities for Machine Learning pipelines"; 79 homepage = "https://github.com/mindsdb/dataprep_ml"; 80 license = licenses.gpl3Only; 81 maintainers = with maintainers; [ mbalatsko ]; 82 }; 83}