//! Telemetry infrastructure for weaver services. //! //! Provides: //! - Prometheus metrics with `/metrics` endpoint //! - Tracing with pretty console output + optional Loki push //! //! # Usage //! //! ```ignore //! use weaver_common::telemetry::{self, TelemetryConfig}; //! //! #[tokio::main] //! async fn main() { //! // Initialize telemetry (metrics + tracing) //! let config = TelemetryConfig::from_env("weaver-index"); //! telemetry::init(config).await; //! //! // Mount the metrics endpoint in your axum router //! let app = Router::new() //! .route("/metrics", get(|| async { telemetry::render() })); //! //! // Use metrics //! metrics::counter!("requests_total").increment(1); //! //! // Use tracing (goes to both console and loki if configured) //! tracing::info!("server started"); //! } //! ``` use metrics_exporter_prometheus::{PrometheusBuilder, PrometheusHandle}; use std::sync::OnceLock; use tracing::Level; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; use tracing_subscriber::{EnvFilter, Layer}; static PROMETHEUS_HANDLE: OnceLock = OnceLock::new(); /// Telemetry configuration #[derive(Debug, Clone)] pub struct TelemetryConfig { /// Service name for labeling (e.g., "weaver-index", "weaver-app") pub service_name: String, /// Loki push URL (e.g., "http://localhost:3100"). None disables Loki. pub loki_url: Option, /// Console log level (default: INFO, DEBUG in debug builds) pub console_level: Level, } impl TelemetryConfig { /// Load config from environment variables. /// /// - `LOKI_URL`: Loki push endpoint (optional) /// - `RUST_LOG`: Standard env filter (optional, overrides console_level) pub fn from_env(service_name: impl Into) -> Self { let console_level = if cfg!(debug_assertions) { Level::DEBUG } else { Level::INFO }; Self { service_name: service_name.into(), loki_url: std::env::var("LOKI_URL").ok(), console_level, } } } /// Opaque handle for the Loki background task. pub struct LokiTask(tracing_loki::BackgroundTask); /// Initialize telemetry (metrics + tracing). /// /// Call once at application startup. If `LOKI_URL` is set, spawns a background /// task to push logs to Loki. pub async fn init(config: TelemetryConfig) { // Initialize prometheus metrics init_metrics(); // Initialize tracing subscriber if let Some(task) = init_tracing(config) { // Spawn the loki background task tokio::spawn(task.0); } } /// Initialize telemetry without spawning the Loki task. /// /// Use this when you need to set up tracing before a tokio runtime is available. /// Returns the Loki task if configured - caller must spawn it later with `spawn_loki_task`. pub fn init_sync(config: TelemetryConfig) -> Option { init_metrics(); init_tracing(config) } /// Spawn the Loki background task. /// /// Call this inside a tokio runtime after `init_sync`. pub fn spawn_loki_task(task: LokiTask) { tokio::spawn(task.0); } /// Initialize just the prometheus metrics recorder. pub fn init_metrics() -> &'static PrometheusHandle { PROMETHEUS_HANDLE.get_or_init(|| { // HTTP request duration buckets (in seconds) let http_buckets = vec![ 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, ]; PrometheusBuilder::new() .set_buckets_for_metric( metrics_exporter_prometheus::Matcher::Prefix("http_request_duration".to_string()), &http_buckets, ) .expect("failed to set histogram buckets") .install_recorder() .expect("failed to install prometheus recorder") }) } /// Initialize tracing with console + optional Loki layers. /// /// Returns the Loki background task if Loki is configured. fn init_tracing(config: TelemetryConfig) -> Option { let env_filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| { EnvFilter::new(format!("{}", config.console_level.as_str().to_lowercase())) }); // Pretty console layer for human-readable stdout let console_layer = tracing_subscriber::fmt::layer() .with_target(true) .with_thread_ids(false) .with_file(false) .with_line_number(false) .compact() .with_filter(env_filter); // Optional Loki layer for structured logs if let Some(loki_url) = config.loki_url { match tracing_loki::url::Url::parse(&loki_url) { Ok(url) => { let (loki_layer, loki_task) = tracing_loki::builder() .label("service", config.service_name.clone()) .expect("invalid label") .build_url(url) .expect("failed to build loki layer"); tracing_subscriber::registry() .with(console_layer) .with(loki_layer) .init(); tracing::info!( service = %config.service_name, loki_url = %loki_url, "telemetry initialized with loki" ); Some(LokiTask(loki_task)) } Err(e) => { // Invalid URL - fall back to console only tracing_subscriber::registry().with(console_layer).init(); tracing::warn!( error = %e, loki_url = %loki_url, "invalid LOKI_URL, falling back to console only" ); None } } } else { // No Loki URL - console only tracing_subscriber::registry().with(console_layer).init(); tracing::debug!( service = %config.service_name, "telemetry initialized (console only, set LOKI_URL to enable loki)" ); None } } /// Get the prometheus handle. pub fn handle() -> &'static PrometheusHandle { PROMETHEUS_HANDLE.get_or_init(|| { PrometheusBuilder::new() .install_recorder() .expect("failed to install prometheus recorder") }) } /// Render metrics in prometheus text format. pub fn render() -> String { handle().render() } // Re-export the metrics crate for convenience pub use metrics::{counter, gauge, histogram}; /// HTTP metrics middleware for axum. /// /// Records `http_requests_total` counter and `http_request_duration_seconds` histogram. /// Use with `axum::middleware::from_fn`. /// /// # Example /// ```ignore /// use axum::middleware; /// use weaver_common::telemetry::http_metrics; /// /// let app = Router::new() /// .route("/", get(handler)) /// .layer(middleware::from_fn(http_metrics)); /// ``` #[cfg(feature = "telemetry")] pub async fn http_metrics( req: axum::extract::Request, next: axum::middleware::Next, ) -> axum::response::Response { let start = std::time::Instant::now(); let method = req.method().to_string(); let path = req.uri().path().to_string(); let response = next.run(req).await; let duration = start.elapsed().as_secs_f64(); let status = response.status().as_u16().to_string(); metrics::counter!( "http_requests_total", "method" => method.clone(), "path" => normalize_path(&path), "status" => status ) .increment(1); metrics::histogram!( "http_request_duration_seconds", "method" => method, "path" => normalize_path(&path) ) .record(duration); response } /// Normalize path for metrics labels. /// Keeps first 3 segments, collapses rest to reduce cardinality. #[cfg(feature = "telemetry")] fn normalize_path(path: &str) -> String { let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect(); match parts.len() { 0 => "/".to_string(), 1 => format!("/{}", parts[0]), 2 => format!("/{}/{}", parts[0], parts[1]), 3 => format!("/{}/{}/{}", parts[0], parts[1], parts[2]), _ => format!("/{}/{}/{}/*", parts[0], parts[1], parts[2]), } }