api/src/routes/tts.rs at push-notifications

pierrelf.com / ayos
fork atom
this repo has no description
fork atom
ayos / api / src / routes / tts.rs
at push-notifications 94 lines 2.7 kB view raw
wrap content
pierrelf.com Replace browser TTS with Edge TTS server-side endpoint 5w ago
5b432ebf
 1use axum::extract::Query;
 2use axum::http::header;
 3use axum::response::{IntoResponse, Response};
 4use msedge_tts::tts::client::connect_async;
 5use msedge_tts::tts::SpeechConfig;
 6use sha2::{Digest, Sha256};
 7use std::path::PathBuf;
 8
 9use crate::errors::AppError;
10
11const DEFAULT_VOICE: &str = "fil-PH-BlessicaNeural";
12const ALLOWED_VOICES: &[&str] = &["fil-PH-BlessicaNeural", "fil-PH-AngeloNeural"];
13const MAX_TEXT_LEN: usize = 500;
14const AUDIO_FORMAT: &str = "audio-24khz-48kbitrate-mono-mp3";
15
16#[derive(serde::Deserialize)]
17pub struct TtsQuery {
18    text: String,
19    voice: Option<String>,
20}
21
22fn cache_dir() -> PathBuf {
23    let dir = PathBuf::from(std::env::var("DATA_DIR").unwrap_or_else(|_| "data".to_string()))
24        .join("tts-cache");
25    let _ = std::fs::create_dir_all(&dir);
26    dir
27}
28
29fn cache_key(text: &str, voice: &str) -> String {
30    let mut hasher = Sha256::new();
31    hasher.update(text.as_bytes());
32    hasher.update(b"|");
33    hasher.update(voice.as_bytes());
34    hex::encode(hasher.finalize())
35}
36
37pub async fn synthesize(Query(params): Query<TtsQuery>) -> Result<Response, AppError> {
38    let text = params.text.trim().to_string();
39    if text.is_empty() {
40        return Err(AppError::BadRequest("text is required".to_string()));
41    }
42    if text.len() > MAX_TEXT_LEN {
43        return Err(AppError::BadRequest(format!(
44            "text exceeds {MAX_TEXT_LEN} characters"
45        )));
46    }
47
48    let voice = params.voice.unwrap_or_else(|| DEFAULT_VOICE.to_string());
49    if !ALLOWED_VOICES.contains(&voice.as_str()) {
50        return Err(AppError::BadRequest(format!("unsupported voice: {voice}")));
51    }
52
53    // Check disk cache
54    let key = cache_key(&text, &voice);
55    let cache_path = cache_dir().join(format!("{key}.mp3"));
56
57    if let Ok(bytes) = tokio::fs::read(&cache_path).await {
58        return Ok(audio_response(bytes));
59    }
60
61    // Synthesize via Edge TTS
62    let mut tts = connect_async()
63        .await
64        .map_err(|e| AppError::Internal(format!("TTS connect failed: {e}")))?;
65
66    let config = SpeechConfig {
67        voice_name: voice,
68        audio_format: AUDIO_FORMAT.to_string(),
69        pitch: 0,
70        rate: 0,
71        volume: 0,
72    };
73
74    let audio = tts
75        .synthesize(&text, &config)
76        .await
77        .map_err(|e| AppError::Internal(format!("TTS synthesis failed: {e}")))?;
78
79    // Write to cache (best-effort)
80    let _ = tokio::fs::write(&cache_path, &audio.audio_bytes).await;
81
82    Ok(audio_response(audio.audio_bytes))
83}
84
85fn audio_response(bytes: Vec<u8>) -> Response {
86    (
87        [
88            (header::CONTENT_TYPE, "audio/mpeg"),
89            (header::CACHE_CONTROL, "public, max-age=86400"),
90        ],
91        bytes,
92    )
93        .into_response()
94}