wyoming-faster-whisper: 2.4.0 -> 2.5.0 (#417413)

authored by

Martin Weinelt and committed by
GitHub
0afcbf81 a53e6ff1

+246 -203
+235 -201
nixos/modules/services/home-automation/wyoming/faster-whisper.nix
··· 10 10 cfg = config.services.wyoming.faster-whisper; 11 11 12 12 inherit (lib) 13 + mapAttrsToList 13 14 mkOption 14 15 mkEnableOption 15 16 mkPackageOption 17 + optionals 16 18 types 17 19 ; 18 20 ··· 24 26 escapeSystemdExecArgs 25 27 ; 26 28 29 + finalPackage = cfg.package.overridePythonAttrs (oldAttrs: { 30 + dependencies = 31 + oldAttrs.dependencies 32 + # for transformer model support 33 + ++ optionals cfg.useTransformers oldAttrs.optional-dependencies.transformers; 34 + }); 35 + 27 36 in 28 37 29 38 { ··· 35 44 description = '' 36 45 Attribute set of wyoming-faster-whisper instances to spawn. 37 46 ''; 38 - type = types.attrsOf ( 39 - types.submodule ( 40 - { ... }: 41 - { 42 - options = { 43 - enable = mkEnableOption "Wyoming faster-whisper server"; 47 + type = attrsOf (submodule { 48 + options = { 49 + enable = mkEnableOption "Wyoming faster-whisper server"; 44 50 45 - model = mkOption { 46 - type = str; 47 - default = "tiny-int8"; 48 - example = "Systran/faster-distil-whisper-small.en"; 49 - description = '' 50 - Name of the voice model to use. Can also be a HuggingFace model ID or a path to 51 - a custom model directory. 51 + model = mkOption { 52 + type = str; 53 + default = "tiny-int8"; 54 + example = "Systran/faster-distil-whisper-small.en"; 55 + # https://github.com/home-assistant/addons/blob/master/whisper/DOCS.md#option-model 56 + description = '' 57 + Name of the voice model to use. Can also be a HuggingFace model ID or a path to 58 + a custom model directory. 52 59 53 - Compressed models (`int8`) are slightly less accurate, but smaller and faster. 60 + With {option}`useTranformers` enabled, a HuggingFace transformers Whisper model 61 + ID from HuggingFace like `openai/whisper-tiny.en` must be used. 54 62 55 - Available models: 56 - - `tiny-int8` (compressed) 57 - - `tiny` 58 - - `tiny.en` (English only) 59 - - `base-int8` (compressed) 60 - - `base` 61 - - `base.en` (English only) 62 - - `small-int8` (compressed) 63 - - `distil-small.en` (distilled, English only) 64 - - `small` 65 - - `small.en` (English only) 66 - - `medium-int8` (compressed) 67 - - `distil-medium.en` (distilled, English only) 68 - - `medium` 69 - - `medium.en` (English only) 70 - - `large` 71 - - `large-v1` 72 - - `distil-large-v2` (distilled, English only) 73 - - `large-v2` 74 - - `distil-large-v3` (distilled, English only) 75 - - `large-v3` 76 - - `turbo` (faster than large-v3) 77 - ''; 78 - }; 63 + Compressed models (`int8`) are slightly less accurate, but smaller and faster. 64 + Distilled models are uncompressed and faster and smaller than non-distilled models. 79 65 80 - uri = mkOption { 81 - type = strMatching "^(tcp|unix)://.*$"; 82 - example = "tcp://0.0.0.0:10300"; 83 - description = '' 84 - URI to bind the wyoming server to. 85 - ''; 86 - }; 66 + Available models: 67 + - `tiny-int8` (compressed) 68 + - `tiny` 69 + - `tiny.en` (English only) 70 + - `base-int8` (compressed) 71 + - `base` 72 + - `base.en` (English only) 73 + - `small-int8` (compressed) 74 + - `distil-small.en` (distilled, English only) 75 + - `small` 76 + - `small.en` (English only) 77 + - `medium-int8` (compressed) 78 + - `distil-medium.en` (distilled, English only) 79 + - `medium` 80 + - `medium.en` (English only) 81 + - `large` 82 + - `large-v1` 83 + - `distil-large-v2` (distilled, English only) 84 + - `large-v2` 85 + - `distil-large-v3` (distilled, English only) 86 + - `large-v3` 87 + - `turbo` (faster than large-v3) 88 + ''; 89 + }; 87 90 88 - device = mkOption { 89 - # https://opennmt.net/CTranslate2/python/ctranslate2.models.Whisper.html# 90 - type = types.enum [ 91 - "cpu" 92 - "cuda" 93 - "auto" 94 - ]; 95 - default = "cpu"; 96 - description = '' 97 - Determines the platform faster-whisper is run on. CPU works everywhere, CUDA requires a compatible NVIDIA GPU. 98 - ''; 99 - }; 91 + useTransformers = mkOption { 92 + type = bool; 93 + default = false; 94 + description = '' 95 + Whether to provide the dependencies to allow using transformer models. 96 + ''; 97 + }; 100 98 101 - language = mkOption { 102 - type = enum [ 103 - # https://github.com/home-assistant/addons/blob/master/whisper/config.yaml#L20 104 - "auto" 105 - "af" 106 - "am" 107 - "ar" 108 - "as" 109 - "az" 110 - "ba" 111 - "be" 112 - "bg" 113 - "bn" 114 - "bo" 115 - "br" 116 - "bs" 117 - "ca" 118 - "cs" 119 - "cy" 120 - "da" 121 - "de" 122 - "el" 123 - "en" 124 - "es" 125 - "et" 126 - "eu" 127 - "fa" 128 - "fi" 129 - "fo" 130 - "fr" 131 - "gl" 132 - "gu" 133 - "ha" 134 - "haw" 135 - "he" 136 - "hi" 137 - "hr" 138 - "ht" 139 - "hu" 140 - "hy" 141 - "id" 142 - "is" 143 - "it" 144 - "ja" 145 - "jw" 146 - "ka" 147 - "kk" 148 - "km" 149 - "kn" 150 - "ko" 151 - "la" 152 - "lb" 153 - "ln" 154 - "lo" 155 - "lt" 156 - "lv" 157 - "mg" 158 - "mi" 159 - "mk" 160 - "ml" 161 - "mn" 162 - "mr" 163 - "ms" 164 - "mt" 165 - "my" 166 - "ne" 167 - "nl" 168 - "nn" 169 - "no" 170 - "oc" 171 - "pa" 172 - "pl" 173 - "ps" 174 - "pt" 175 - "ro" 176 - "ru" 177 - "sa" 178 - "sd" 179 - "si" 180 - "sk" 181 - "sl" 182 - "sn" 183 - "so" 184 - "sq" 185 - "sr" 186 - "su" 187 - "sv" 188 - "sw" 189 - "ta" 190 - "te" 191 - "tg" 192 - "th" 193 - "tk" 194 - "tl" 195 - "tr" 196 - "tt" 197 - "uk" 198 - "ur" 199 - "uz" 200 - "vi" 201 - "yi" 202 - "yue" 203 - "yo" 204 - "zh" 205 - ]; 206 - example = "en"; 207 - description = '' 208 - The language used to to parse words and sentences. 209 - ''; 210 - }; 99 + uri = mkOption { 100 + type = strMatching "^(tcp|unix)://.*$"; 101 + example = "tcp://0.0.0.0:10300"; 102 + description = '' 103 + URI to bind the wyoming server to. 104 + ''; 105 + }; 211 106 212 - initialPrompt = mkOption { 213 - type = nullOr str; 214 - default = null; 215 - example = '' 216 - The following conversation takes place in the universe of Wizard of Oz. Key terms include 'Yellow Brick Road' (the path to follow), 'Emerald City' (the ultimate goal), and 'Ruby Slippers' (the magical tools to succeed). Keep these in mind as they guide the journey. 217 - ''; 218 - description = '' 219 - Optional text to provide as a prompt for the first window. This can be used to provide, or 220 - "prompt-engineer" a context for transcription, e.g. custom vocabularies or proper nouns 221 - to make it more likely to predict those word correctly. 222 - ''; 223 - }; 107 + device = mkOption { 108 + # https://opennmt.net/CTranslate2/python/ctranslate2.models.Whisper.html# 109 + type = enum [ 110 + "cpu" 111 + "cuda" 112 + "auto" 113 + ]; 114 + default = "cpu"; 115 + description = '' 116 + Determines the platform faster-whisper is run on. CPU works everywhere, CUDA requires a compatible NVIDIA GPU. 117 + ''; 118 + }; 224 119 225 - beamSize = mkOption { 226 - type = ints.unsigned; 227 - default = 0; 228 - example = 5; 229 - description = '' 230 - The number of beams to use in beam search. 231 - Use `0` to automatically select a value based on the CPU. 232 - ''; 233 - apply = toString; 234 - }; 120 + language = mkOption { 121 + type = enum [ 122 + # https://github.com/home-assistant/addons/blob/master/whisper/config.yaml#L20 123 + "auto" 124 + "af" 125 + "am" 126 + "ar" 127 + "as" 128 + "az" 129 + "ba" 130 + "be" 131 + "bg" 132 + "bn" 133 + "bo" 134 + "br" 135 + "bs" 136 + "ca" 137 + "cs" 138 + "cy" 139 + "da" 140 + "de" 141 + "el" 142 + "en" 143 + "es" 144 + "et" 145 + "eu" 146 + "fa" 147 + "fi" 148 + "fo" 149 + "fr" 150 + "gl" 151 + "gu" 152 + "ha" 153 + "haw" 154 + "he" 155 + "hi" 156 + "hr" 157 + "ht" 158 + "hu" 159 + "hy" 160 + "id" 161 + "is" 162 + "it" 163 + "ja" 164 + "jw" 165 + "ka" 166 + "kk" 167 + "km" 168 + "kn" 169 + "ko" 170 + "la" 171 + "lb" 172 + "ln" 173 + "lo" 174 + "lt" 175 + "lv" 176 + "mg" 177 + "mi" 178 + "mk" 179 + "ml" 180 + "mn" 181 + "mr" 182 + "ms" 183 + "mt" 184 + "my" 185 + "ne" 186 + "nl" 187 + "nn" 188 + "no" 189 + "oc" 190 + "pa" 191 + "pl" 192 + "ps" 193 + "pt" 194 + "ro" 195 + "ru" 196 + "sa" 197 + "sd" 198 + "si" 199 + "sk" 200 + "sl" 201 + "sn" 202 + "so" 203 + "sq" 204 + "sr" 205 + "su" 206 + "sv" 207 + "sw" 208 + "ta" 209 + "te" 210 + "tg" 211 + "th" 212 + "tk" 213 + "tl" 214 + "tr" 215 + "tt" 216 + "uk" 217 + "ur" 218 + "uz" 219 + "vi" 220 + "yi" 221 + "yue" 222 + "yo" 223 + "zh" 224 + ]; 225 + example = "en"; 226 + description = '' 227 + The language used to to parse words and sentences. 228 + ''; 229 + }; 235 230 236 - extraArgs = mkOption { 237 - type = listOf str; 238 - default = [ ]; 239 - description = '' 240 - Extra arguments to pass to the server commandline. 241 - ''; 242 - }; 243 - }; 244 - } 245 - ) 246 - ); 231 + initialPrompt = mkOption { 232 + type = nullOr str; 233 + default = null; 234 + # https://github.com/home-assistant/addons/blob/master/whisper/DOCS.md#option-custom_model_type 235 + example = '' 236 + The following conversation takes place in the universe of 237 + Wizard of Oz. Key terms include 'Yellow Brick Road' (the path 238 + to follow), 'Emerald City' (the ultimate goal), and 'Ruby 239 + Slippers' (the magical tools to succeed). Keep these in mind as 240 + they guide the journey. 241 + ''; 242 + description = '' 243 + Optional text to provide as a prompt for the first window. This can be used to provide, or 244 + "prompt-engineer" a context for transcription, e.g. custom vocabularies or proper nouns 245 + to make it more likely to predict those word correctly. 246 + 247 + Not supported when the {option}`customModelType` is `transformers`. 248 + ''; 249 + }; 250 + 251 + beamSize = mkOption { 252 + type = ints.unsigned; 253 + default = 0; 254 + example = 5; 255 + description = '' 256 + The number of beams to use in beam search. 257 + Use `0` to automatically select a value based on the CPU. 258 + ''; 259 + apply = toString; 260 + }; 261 + 262 + extraArgs = mkOption { 263 + type = listOf str; 264 + default = [ ]; 265 + description = '' 266 + Extra arguments to pass to the server commandline. 267 + ''; 268 + }; 269 + }; 270 + }); 247 271 }; 248 272 }; 249 273 ··· 256 280 ; 257 281 in 258 282 mkIf (cfg.servers != { }) { 283 + assertions = mapAttrsToList ( 284 + server: options: { 285 + assertion = options.useTransformers -> options.initialPromt == null; 286 + message = "wyoming-faster-whisper/${server}: Transformer models (`useTransformers`) do not currently support an `initialPrompt`."; 287 + } 288 + ); 289 + 259 290 systemd.services = mapAttrs' ( 260 291 server: options: 261 292 nameValuePair "wyoming-faster-whisper-${server}" { ··· 279 310 # https://github.com/home-assistant/addons/blob/master/whisper/rootfs/etc/s6-overlay/s6-rc.d/whisper/run 280 311 ExecStart = escapeSystemdExecArgs ( 281 312 [ 282 - (lib.getExe cfg.package) 313 + (lib.getExe finalPackage) 283 314 "--data-dir" 284 315 "/var/lib/wyoming/faster-whisper" 285 316 "--uri" ··· 292 323 options.language 293 324 "--beam-size" 294 325 options.beamSize 326 + ] 327 + ++ lib.optionals options.useTransformers [ 328 + "--use-transformers" 295 329 ] 296 330 ++ lib.optionals (options.initialPrompt != null) [ 297 331 "--initial-prompt"
+11 -2
pkgs/by-name/wy/wyoming-faster-whisper/package.nix
··· 6 6 7 7 python3Packages.buildPythonApplication rec { 8 8 pname = "wyoming-faster-whisper"; 9 - version = "2.4.0"; 9 + version = "2.5.0"; 10 10 pyproject = true; 11 11 12 12 src = fetchFromGitHub { 13 13 owner = "rhasspy"; 14 14 repo = "wyoming-faster-whisper"; 15 15 rev = "refs/tags/v${version}"; 16 - hash = "sha256-Ai28i+2/oWI2Y61x7U5an5MBHfuBaGy6qZZwZydS308="; 16 + hash = "sha256-MKB6gZdGdAYoNK8SRiDHG8xtMZ5mXdaSn+bH4T6o/K4="; 17 17 }; 18 18 19 19 build-system = with python3Packages; [ ··· 29 29 faster-whisper 30 30 wyoming 31 31 ]; 32 + 33 + optional-dependencies = { 34 + transformers = 35 + with python3Packages; 36 + [ 37 + transformers 38 + ] 39 + ++ transformers.optional-dependencies.torch; 40 + }; 32 41 33 42 pythonImportsCheck = [ 34 43 "wyoming_faster_whisper"