1{
2 lib,
3 stdenv,
4 buildPythonPackage,
5 braceexpand,
6 imageio,
7 lmdb,
8 msgpack,
9 numpy,
10 pytestCheckHook,
11 pyyaml,
12 setuptools,
13 torch,
14 torchvision,
15 wheel,
16 fetchFromGitHub,
17}:
18buildPythonPackage rec {
19 pname = "webdataset";
20 version = "0.2.90";
21 pyproject = true;
22
23 src = fetchFromGitHub {
24 owner = "webdataset";
25 repo = "webdataset";
26 rev = "refs/tags/${version}";
27 hash = "sha256-selj7XD7NS831lbPnx/4o46bNpsxuFdSEIIb4S2b7S0=";
28 };
29
30 nativeBuildInputs = [
31 setuptools
32 wheel
33 ];
34
35 propagatedBuildInputs = [
36 braceexpand
37 numpy
38 pyyaml
39 ];
40
41 nativeCheckInputs = [
42 pytestCheckHook
43 imageio
44 torch
45 torchvision
46 msgpack
47 lmdb
48 ];
49
50 pythonImportsCheck = [ "webdataset" ];
51
52 disabledTests =
53 [
54 # requires network
55 "test_batched"
56 "test_cache_dir"
57 "test_concurrent_download_and_open"
58 "test_dataloader"
59 "test_decode_handlers"
60 "test_decoder"
61 "test_download"
62 "test_handlers"
63 "test_pipe"
64 "test_remote_file"
65 "test_shard_syntax"
66 "test_torchvision"
67 "test_unbatched"
68 "test_yaml3"
69 ]
70 ++ lib.optionals stdenv.isDarwin [
71 # pickling error
72 "test_background_download"
73 ]
74 ++ lib.optionals (stdenv.isx86_64 && stdenv.isDarwin) [
75 "test_concurrent_access"
76 # fails to patch 'init_process_group' from torch.distributed
77 "TestDistributedChunkedSampler"
78 ]
79 ++ lib.optionals (stdenv.isAarch64 && stdenv.isLinux) [
80 # segfaults on aarch64-linux
81 "test_webloader"
82 "test_webloader2"
83 "test_webloader_repeat"
84 "test_webloader_unbatched"
85 ];
86
87 meta = with lib; {
88 description = "A high-performance Python-based I/O system for large (and small) deep learning problems, with strong support for PyTorch";
89 mainProgram = "widsindex";
90 homepage = "https://github.com/webdataset/webdataset";
91 changelog = "https://github.com/webdataset/webdataset/releases/tag/${version}";
92 license = licenses.bsd3;
93 maintainers = with maintainers; [ iynaix ];
94 };
95}