1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5 pythonOlder,
6 scikit-learn,
7 termcolor,
8 tqdm,
9 pandas,
10 setuptools,
11 # test dependencies
12 pytestCheckHook,
13 pytest-lazy-fixture,
14 tensorflow,
15 torch,
16 datasets,
17 torchvision,
18 keras,
19 fasttext,
20 hypothesis,
21 wget,
22 matplotlib,
23 skorch,
24}:
25
26buildPythonPackage rec {
27 pname = "cleanlab";
28 version = "2.6.1";
29 pyproject = true;
30 disabled = pythonOlder "3.7";
31
32 src = fetchFromGitHub {
33 owner = "cleanlab";
34 repo = pname;
35 rev = "refs/tags/v${version}";
36 hash = "sha256-+uJtm/t6Ri25V/9N/2fcOgCOBaBy8PrsM/tO1uX7FEY=";
37 };
38
39 nativeBuildInputs = [ setuptools ];
40
41 propagatedBuildInputs = [
42 scikit-learn
43 termcolor
44 tqdm
45 pandas
46 ];
47
48 # This is ONLY turned off when we have testing enabled.
49 # The reason we do this is because of duplicate packages in the enclosure
50 # when using the packages in nativeCheckInputs.
51 # Affected packages: grpcio protobuf tensorboard tensorboard-plugin-profile
52 catchConflicts = (!doCheck);
53 doCheck = true;
54
55 nativeCheckInputs = [
56 pytestCheckHook
57 pytest-lazy-fixture
58 tensorflow
59 torch
60 datasets
61 torchvision
62 keras
63 fasttext
64 hypothesis
65 wget
66 matplotlib
67 skorch
68 ];
69
70 disabledTests = [
71 # Requires the datasets we prevent from downloading
72 "test_create_imagelab"
73 ];
74
75 disabledTestPaths = [
76 # Requires internet
77 "tests/test_dataset.py"
78 # Requires the datasets we just prevented from downloading
79 "tests/datalab/test_cleanvision_integration.py"
80 ];
81
82 meta = with lib; {
83 description = "The standard data-centric AI package for data quality and machine learning with messy, real-world data and labels.";
84 homepage = "https://github.com/cleanlab/cleanlab";
85 changelog = "https://github.com/cleanlab/cleanlab/releases/tag/v${version}";
86 license = licenses.agpl3Only;
87 maintainers = with maintainers; [ happysalada ];
88 };
89}