this repo has no description
at push-notifications 94 lines 2.7 kB view raw
1use axum::extract::Query; 2use axum::http::header; 3use axum::response::{IntoResponse, Response}; 4use msedge_tts::tts::client::connect_async; 5use msedge_tts::tts::SpeechConfig; 6use sha2::{Digest, Sha256}; 7use std::path::PathBuf; 8 9use crate::errors::AppError; 10 11const DEFAULT_VOICE: &str = "fil-PH-BlessicaNeural"; 12const ALLOWED_VOICES: &[&str] = &["fil-PH-BlessicaNeural", "fil-PH-AngeloNeural"]; 13const MAX_TEXT_LEN: usize = 500; 14const AUDIO_FORMAT: &str = "audio-24khz-48kbitrate-mono-mp3"; 15 16#[derive(serde::Deserialize)] 17pub struct TtsQuery { 18 text: String, 19 voice: Option<String>, 20} 21 22fn cache_dir() -> PathBuf { 23 let dir = PathBuf::from(std::env::var("DATA_DIR").unwrap_or_else(|_| "data".to_string())) 24 .join("tts-cache"); 25 let _ = std::fs::create_dir_all(&dir); 26 dir 27} 28 29fn cache_key(text: &str, voice: &str) -> String { 30 let mut hasher = Sha256::new(); 31 hasher.update(text.as_bytes()); 32 hasher.update(b"|"); 33 hasher.update(voice.as_bytes()); 34 hex::encode(hasher.finalize()) 35} 36 37pub async fn synthesize(Query(params): Query<TtsQuery>) -> Result<Response, AppError> { 38 let text = params.text.trim().to_string(); 39 if text.is_empty() { 40 return Err(AppError::BadRequest("text is required".to_string())); 41 } 42 if text.len() > MAX_TEXT_LEN { 43 return Err(AppError::BadRequest(format!( 44 "text exceeds {MAX_TEXT_LEN} characters" 45 ))); 46 } 47 48 let voice = params.voice.unwrap_or_else(|| DEFAULT_VOICE.to_string()); 49 if !ALLOWED_VOICES.contains(&voice.as_str()) { 50 return Err(AppError::BadRequest(format!("unsupported voice: {voice}"))); 51 } 52 53 // Check disk cache 54 let key = cache_key(&text, &voice); 55 let cache_path = cache_dir().join(format!("{key}.mp3")); 56 57 if let Ok(bytes) = tokio::fs::read(&cache_path).await { 58 return Ok(audio_response(bytes)); 59 } 60 61 // Synthesize via Edge TTS 62 let mut tts = connect_async() 63 .await 64 .map_err(|e| AppError::Internal(format!("TTS connect failed: {e}")))?; 65 66 let config = SpeechConfig { 67 voice_name: voice, 68 audio_format: AUDIO_FORMAT.to_string(), 69 pitch: 0, 70 rate: 0, 71 volume: 0, 72 }; 73 74 let audio = tts 75 .synthesize(&text, &config) 76 .await 77 .map_err(|e| AppError::Internal(format!("TTS synthesis failed: {e}")))?; 78 79 // Write to cache (best-effort) 80 let _ = tokio::fs::write(&cache_path, &audio.audio_bytes).await; 81 82 Ok(audio_response(audio.audio_bytes)) 83} 84 85fn audio_response(bytes: Vec<u8>) -> Response { 86 ( 87 [ 88 (header::CONTENT_TYPE, "audio/mpeg"), 89 (header::CACHE_CONTROL, "public, max-age=86400"), 90 ], 91 bytes, 92 ) 93 .into_response() 94}