nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
1{
2 lib,
3 buildPythonPackage,
4 colorlog,
5 dataclasses-json,
6 fetchPypi,
7 nltk-data,
8 numpy,
9 pandas,
10 poetry-core,
11 pydantic,
12 pydateinfer,
13 python-dateutil,
14 scipy,
15 symlinkJoin,
16 type-infer,
17}:
18let
19 testNltkData = symlinkJoin {
20 name = "nltk-test-data";
21 paths = [
22 nltk-data.punkt
23 nltk-data.stopwords
24 ];
25 };
26in
27buildPythonPackage rec {
28 pname = "dataprep-ml";
29 version = "25.2.3.0";
30 pyproject = true;
31
32 # using PyPI as github repo does not contain tags or release branches
33 src = fetchPypi {
34 pname = "dataprep_ml";
35 inherit version;
36 hash = "sha256-pULqrPTxGtBLRsKCpSsP3a/QA0O5eXOP6BSI5TbCQWY=";
37 };
38
39 pythonRelaxDeps = [
40 "pydantic"
41 "numpy"
42 ];
43
44 nativeBuildInputs = [
45 poetry-core
46 ];
47
48 propagatedBuildInputs = [
49 colorlog
50 dataclasses-json
51 numpy
52 pandas
53 pydantic
54 pydateinfer
55 python-dateutil
56 scipy
57 type-infer
58 ];
59
60 # PyPI tarball has no tests
61 doCheck = false;
62
63 # Package import requires NLTK data to be downloaded
64 # It is the only way to set NLTK_DATA environment variable,
65 # so that it is available in pythonImportsCheck
66 env.NLTK_DATA = testNltkData;
67 pythonImportsCheck = [
68 "dataprep_ml"
69 "dataprep_ml.cleaners"
70 "dataprep_ml.helpers"
71 "dataprep_ml.imputers"
72 "dataprep_ml.insights"
73 "dataprep_ml.recommenders"
74 "dataprep_ml.splitters"
75 ];
76
77 meta = {
78 description = "Data utilities for Machine Learning pipelines";
79 homepage = "https://github.com/mindsdb/dataprep_ml";
80 license = lib.licenses.gpl3Only;
81 maintainers = [ ];
82 };
83}