1{
2 lib,
3 stdenv,
4 fetchFromGitHub,
5 buildPythonPackage,
6 substituteAll,
7
8 # build-system
9 setuptools,
10
11 # runtime
12 ffmpeg-headless,
13
14 # propagates
15 more-itertools,
16 numba,
17 numpy,
18 triton,
19 tiktoken,
20 torch,
21 tqdm,
22
23 # tests
24 pytestCheckHook,
25 scipy,
26}:
27
28buildPythonPackage rec {
29 pname = "whisper";
30 version = "20231117";
31 pyproject = true;
32
33 src = fetchFromGitHub {
34 owner = "openai";
35 repo = pname;
36 rev = "refs/tags/v${version}";
37 hash = "sha256-MJ1XjB/GuYUiECCuuHS0NWHvvs+ko0oTvLuDI7zLNiY=";
38 };
39
40 patches = [
41 (substituteAll {
42 src = ./ffmpeg-path.patch;
43 ffmpeg = ffmpeg-headless;
44 })
45 ];
46
47 nativeBuildInputs = [ setuptools ];
48
49 propagatedBuildInputs = [
50 more-itertools
51 numba
52 numpy
53 tiktoken
54 torch
55 tqdm
56 ] ++ lib.optionals (lib.meta.availableOn stdenv.hostPlatform triton) [ triton ];
57
58 preCheck = ''
59 export HOME=$TMPDIR
60 '';
61
62 nativeCheckInputs = [
63 pytestCheckHook
64 scipy
65 ];
66
67 disabledTests = [
68 # requires network access to download models
69 "test_transcribe"
70 # requires NVIDIA drivers
71 "test_dtw_cuda_equivalence"
72 "test_median_filter_equivalence"
73 ];
74
75 meta = with lib; {
76 changelog = "https://github.com/openai/whisper/blob/v${version}/CHANGELOG.md";
77 description = "General-purpose speech recognition model";
78 mainProgram = "whisper";
79 homepage = "https://github.com/openai/whisper";
80 license = licenses.mit;
81 maintainers = with maintainers; [
82 hexa
83 MayNiklas
84 ];
85 };
86}