1{ lib
2, buildPythonPackage
3, fetchFromGitHub
4, pythonOlder
5, scikit-learn
6, termcolor
7, tqdm
8, pandas
9, setuptools
10# test dependencies
11, pytestCheckHook
12, pytest-lazy-fixture
13, tensorflow
14, torch
15, datasets
16, torchvision
17, keras
18, fasttext
19, hypothesis
20, wget
21, matplotlib
22, skorch
23}:
24
25buildPythonPackage rec {
26 pname = "cleanlab";
27 version = "2.5.0";
28 pyproject = true;
29 disabled = pythonOlder "3.7";
30
31 src = fetchFromGitHub {
32 owner = "cleanlab";
33 repo = pname;
34 rev = "refs/tags/v${version}";
35 hash = "sha256-5XQQVrhjpvjwtFM79DqttObmw/GQLkMQVXb5jhiC8e0=";
36 };
37
38 nativeBuildInputs = [ setuptools ];
39
40 propagatedBuildInputs = [
41 scikit-learn
42 termcolor
43 tqdm
44 pandas
45 ];
46
47 # This is ONLY turned off when we have testing enabled.
48 # The reason we do this is because of duplicate packages in the enclosure
49 # when using the packages in nativeCheckInputs.
50 # Affected packages: grpcio protobuf tensorboard tensorboard-plugin-profile
51 catchConflicts = (!doCheck);
52 doCheck = true;
53
54 nativeCheckInputs = [
55 pytestCheckHook
56 pytest-lazy-fixture
57 tensorflow
58 torch
59 datasets
60 torchvision
61 keras
62 fasttext
63 hypothesis
64 wget
65 matplotlib
66 skorch
67 ];
68
69 disabledTests = [
70 # Requires the datasets we prevent from downloading
71 "test_create_imagelab"
72 ];
73
74 disabledTestPaths = [
75 # Requires internet
76 "tests/test_dataset.py"
77 # Requires the datasets we just prevented from downloading
78 "tests/datalab/test_cleanvision_integration.py"
79 ];
80
81 meta = with lib; {
82 description = "The standard data-centric AI package for data quality and machine learning with messy, real-world data and labels.";
83 homepage = "https://github.com/cleanlab/cleanlab";
84 changelog = "https://github.com/cleanlab/cleanlab/releases/tag/v${version}";
85 license = licenses.agpl3Only;
86 maintainers = with maintainers; [ happysalada ];
87 };
88}