1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5
6 # build-system
7 setuptools,
8
9 # dependencies
10 aiofiles,
11 aiohttp,
12 importlib-metadata,
13 nest-asyncio,
14 psutil,
15 pyyaml,
16 torch,
17 typing-extensions,
18
19 # tests
20 pytest-asyncio,
21 pytestCheckHook,
22}:
23
24buildPythonPackage rec {
25 pname = "torchsnapshot";
26 version = "0.1.0";
27 pyproject = true;
28
29 src = fetchFromGitHub {
30 owner = "pytorch";
31 repo = "torchsnapshot";
32 tag = version;
33 hash = "sha256-F8OaxLH8BL6MPNLFv1hBuVmeEdnEQ5w2Qny6by1wP6k=";
34 };
35
36 build-system = [
37 setuptools
38 ];
39
40 dependencies = [
41 aiofiles
42 aiohttp
43 importlib-metadata
44 nest-asyncio
45 psutil
46 pyyaml
47 torch
48 typing-extensions
49 ];
50
51 pythonImportsCheck = [ "torchsnapshot" ];
52
53 nativeCheckInputs = [
54 pytest-asyncio
55 pytestCheckHook
56 ];
57
58 disabledTests = [
59 # torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
60 # AssertionError: "Socket Timeout" does not match "wait timeout after 5000ms
61 "test_linear_barrier_timeout"
62 ];
63
64 meta = {
65 description = "Performant, memory-efficient checkpointing library for PyTorch applications, designed with large, complex distributed workloads in mind";
66 homepage = "https://github.com/pytorch/torchsnapshot/";
67 changelog = "https://github.com/pytorch/torchsnapshot/releases/tag/${version}";
68 license = lib.licenses.bsd3;
69 maintainers = with lib.maintainers; [ GaetanLepage ];
70 badPlatforms = [
71 # test suite gets stuck and eventually times out with: "torch.distributed.DistNetworkError: The client socket has timed out after"
72 lib.systems.inspect.patterns.isDarwin
73 ];
74 };
75}