observability stuff

Orual 4705bfcc 4453cdd3

+2378 -71
+2
Cargo.lock
··· 12049 "tiny-skia", 12050 "tokio", 12051 "tower", 12052 "tracing", 12053 "tracing-subscriber", 12054 "tracing-wasm", ··· 12092 name = "weaver-common" 12093 version = "0.1.0" 12094 dependencies = [ 12095 "blake3", 12096 "chrono", 12097 "futures-util",
··· 12049 "tiny-skia", 12050 "tokio", 12051 "tower", 12052 + "tower-http", 12053 "tracing", 12054 "tracing-subscriber", 12055 "tracing-wasm", ··· 12093 name = "weaver-common" 12094 version = "0.1.0" 12095 dependencies = [ 12096 + "axum", 12097 "blake3", 12098 "chrono", 12099 "futures-util",
+2 -1
crates/weaver-app/Cargo.toml
··· 39 web = ["dioxus/web", "dioxus-primitives/web"] 40 desktop = ["dioxus/desktop"] 41 mobile = ["dioxus/mobile"] 42 - server = [ "dioxus/server", "dep:jacquard-axum", "dep:axum", "dep:axum-extra", "dep:tower", "dep:resvg", "dep:usvg", "dep:tiny-skia", "dep:textwrap", "dep:askama", "dep:fontdb", "dep:lightningcss"] 43 collab-worker = ["weaver-common/iroh"] 44 45 ··· 63 axum = { version = "0.8.6", optional = true } 64 axum-extra = { version = "0.10", optional = true, features = ["typed-header"] } 65 tower = { version = "0.5", optional = true } 66 mime-sniffer = {version = "^0.1"} 67 chrono = { version = "0.4" } 68 serde = { version = "1.0" }
··· 39 web = ["dioxus/web", "dioxus-primitives/web"] 40 desktop = ["dioxus/desktop"] 41 mobile = ["dioxus/mobile"] 42 + server = [ "dioxus/server", "dep:jacquard-axum", "dep:axum", "dep:axum-extra", "dep:tower", "dep:tower-http", "dep:resvg", "dep:usvg", "dep:tiny-skia", "dep:textwrap", "dep:askama", "dep:fontdb", "dep:lightningcss", "weaver-common/telemetry"] 43 collab-worker = ["weaver-common/iroh"] 44 45 ··· 63 axum = { version = "0.8.6", optional = true } 64 axum-extra = { version = "0.10", optional = true, features = ["typed-header"] } 65 tower = { version = "0.5", optional = true } 66 + tower-http = { version = "0.6", optional = true, features = ["trace"] } 67 mime-sniffer = {version = "^0.1"} 68 chrono = { version = "0.4" } 69 serde = { version = "1.0" }
-1
crates/weaver-app/src/components/app_link.rs
··· 75 #[component] 76 pub fn AppLink(props: AppLinkProps) -> Element { 77 let link_mode = use_context::<LinkMode>(); 78 - tracing::info!(?link_mode, "AppLink: reading LinkMode context"); 79 let class = props.class.clone().unwrap_or_default(); 80 81 match link_mode {
··· 75 #[component] 76 pub fn AppLink(props: AppLinkProps) -> Element { 77 let link_mode = use_context::<LinkMode>(); 78 let class = props.class.clone().unwrap_or_default(); 79 80 match link_mode {
+85 -50
crates/weaver-app/src/main.rs
··· 45 // Filter out noisy crates 46 // Use weaver_app=trace for detailed editor debugging 47 let filter = EnvFilter::new( 48 - "debug,weaver_app=trace,loro_internal=warn,jacquard_identity=info,jacquard_common=info,iroh=info", 49 ); 50 51 let reg = Registry::default() ··· 56 let _ = set_global_default(reg); 57 } 58 59 #[cfg(feature = "server")] 60 std::panic::set_hook(Box::new(|panic_info| { 61 tracing::error!("PANIC: {:?}", panic_info); ··· 63 64 // Run `serve()` on the server only 65 #[cfg(feature = "server")] 66 - dioxus::serve(|| async move { 67 - #[cfg(feature = "fullstack-server")] 68 - use axum::middleware; 69 - use axum::middleware::Next; 70 - use axum::{Router, body::Body, extract::Request, response::Response, routing::get}; 71 - use axum_extra::extract::Host; 72 - use jacquard::oauth::{client::OAuthClient, session::ClientData}; 73 - use std::convert::Infallible; 74 - use weaver_app::auth::AuthStore; 75 - use weaver_app::blobcache::BlobCache; 76 77 - #[cfg(not(feature = "fullstack-server"))] 78 - let router = { Router::new().merge(dioxus::server::router(App)) }; 79 80 - #[cfg(feature = "fullstack-server")] 81 - let router = { 82 - let fetcher = Arc::new(fetch::Fetcher::new(OAuthClient::new( 83 - AuthStore::new(), 84 - ClientData::new_public(CONFIG.oauth.clone()), 85 - ))); 86 87 - let blob_cache = Arc::new(BlobCache::new(fetcher.clone())); 88 - axum::Router::new() 89 - .route("/favicon.ico", get(weaver_app::favicon)) 90 - .serve_dioxus_application(ServeConfig::builder(), App) 91 - // Host context resolution. 92 - .layer(middleware::from_fn({ 93 - let fetcher = fetcher.clone(); 94 - move |req: Request, next: Next| { 95 - let fetcher = fetcher.clone(); 96 - async move { 97 - weaver_app::middleware::host_context_middleware(req, next, fetcher) 98 - .await 99 - } 100 - } 101 - })) 102 - // Insert fetcher and blob cache into extensions. 103 - .layer(middleware::from_fn({ 104 - let blob_cache = blob_cache.clone(); 105 - let fetcher = fetcher.clone(); 106 - move |mut req: Request, next: Next| { 107 - let blob_cache = blob_cache.clone(); 108 - let fetcher = fetcher.clone(); 109 - async move { 110 - req.extensions_mut().insert(blob_cache); 111 - req.extensions_mut().insert(fetcher); 112 - Ok::<_, Infallible>(next.run(req).await) 113 - } 114 - } 115 - })) 116 - }; 117 - Ok(router) 118 }); 119 120 #[cfg(not(feature = "server"))]
··· 45 // Filter out noisy crates 46 // Use weaver_app=trace for detailed editor debugging 47 let filter = EnvFilter::new( 48 + "debug,weaver_app=trace,loro_internal=warn,jacquard_identity=info,jacquard_common=info,iroh=info,reqwest=warn", 49 ); 50 51 let reg = Registry::default() ··· 56 let _ = set_global_default(reg); 57 } 58 59 + // Initialize telemetry (metrics + tracing) before server starts. 60 + // Loki task is spawned inside dioxus::serve where tokio runtime exists. 61 + // Wrapped in Arc<Mutex> so the FnMut closure can clone and take() on first call. 62 + #[cfg(feature = "server")] 63 + let loki_task = { 64 + use weaver_common::telemetry::{self, TelemetryConfig}; 65 + let config = TelemetryConfig::from_env("weaver-app"); 66 + std::sync::Arc::new(std::sync::Mutex::new(telemetry::init_sync(config))) 67 + }; 68 + 69 #[cfg(feature = "server")] 70 std::panic::set_hook(Box::new(|panic_info| { 71 tracing::error!("PANIC: {:?}", panic_info); ··· 73 74 // Run `serve()` on the server only 75 #[cfg(feature = "server")] 76 + dioxus::serve({ 77 + let loki_task = loki_task.clone(); 78 + move || { 79 + let loki_task = loki_task.clone(); 80 + async move { 81 + #[cfg(feature = "fullstack-server")] 82 + use axum::middleware; 83 + use axum::middleware::Next; 84 + use axum::{ 85 + Router, body::Body, extract::Request, response::Response, routing::get, 86 + }; 87 + use axum_extra::extract::Host; 88 + use jacquard::oauth::{client::OAuthClient, session::ClientData}; 89 + use std::convert::Infallible; 90 + use weaver_app::auth::AuthStore; 91 + use weaver_app::blobcache::BlobCache; 92 + use weaver_common::telemetry; 93 94 + // Spawn the Loki background task now that we're in tokio runtime 95 + if let Some(task) = loki_task.lock().unwrap().take() { 96 + telemetry::spawn_loki_task(task); 97 + } 98 99 + #[cfg(not(feature = "fullstack-server"))] 100 + let router = { 101 + Router::new() 102 + .merge(dioxus::server::router(App)) 103 + .layer(middleware::from_fn(telemetry::http_metrics)) 104 + .layer(tower_http::trace::TraceLayer::new_for_http()) 105 + }; 106 107 + #[cfg(feature = "fullstack-server")] 108 + let router = { 109 + let fetcher = Arc::new(fetch::Fetcher::new(OAuthClient::new( 110 + AuthStore::new(), 111 + ClientData::new_public(CONFIG.oauth.clone()), 112 + ))); 113 + 114 + let blob_cache = Arc::new(BlobCache::new(fetcher.clone())); 115 + axum::Router::new() 116 + .route("/favicon.ico", get(weaver_app::favicon)) 117 + .route("/metrics", get(|| async { telemetry::render() })) 118 + .serve_dioxus_application(ServeConfig::builder(), App) 119 + // Host context resolution. 120 + .layer(middleware::from_fn({ 121 + let fetcher = fetcher.clone(); 122 + move |req: Request, next: Next| { 123 + let fetcher = fetcher.clone(); 124 + async move { 125 + weaver_app::middleware::host_context_middleware( 126 + req, next, fetcher, 127 + ) 128 + .await 129 + } 130 + } 131 + })) 132 + // Insert fetcher and blob cache into extensions. 133 + .layer(middleware::from_fn({ 134 + let blob_cache = blob_cache.clone(); 135 + let fetcher = fetcher.clone(); 136 + move |mut req: Request, next: Next| { 137 + let blob_cache = blob_cache.clone(); 138 + let fetcher = fetcher.clone(); 139 + async move { 140 + req.extensions_mut().insert(blob_cache); 141 + req.extensions_mut().insert(fetcher); 142 + Ok::<_, Infallible>(next.run(req).await) 143 + } 144 + } 145 + })) 146 + // HTTP metrics (request count, duration) 147 + .layer(middleware::from_fn(telemetry::http_metrics)) 148 + .layer(tower_http::trace::TraceLayer::new_for_http()) 149 + }; 150 + Ok(router) 151 + } 152 + } 153 }); 154 155 #[cfg(not(feature = "server"))]
+3 -2
crates/weaver-app/src/views/subdomain_navbar.rs
··· 98 } 99 } 100 } 101 - // Author profile link 102 nav { class: "nav-tools", 103 - AuthorProfileLink { ident: ctx.owner.clone() } 104 } 105 106 // Auth button
··· 98 } 99 } 100 } 101 + // Author profile link - temporarily disabled to debug SSR hang 102 nav { class: "nav-tools", 103 + // AuthorProfileLink { ident: ctx.owner.clone() } 104 + "DEBUG: navbar without author link" 105 } 106 107 // Auth button
+2 -1
crates/weaver-common/Cargo.toml
··· 10 native = ["jacquard/dns"] 11 use-index = [] 12 iroh = ["dep:iroh", "dep:iroh-gossip", "dep:iroh-tickets"] 13 - telemetry = ["dep:metrics", "dep:metrics-exporter-prometheus", "dep:tracing-subscriber", "dep:tracing-loki"] 14 cache = ["dep:mini-moka-wasm"] 15 perf = [] 16 ··· 51 metrics-exporter-prometheus = { version = "0.17.2", optional = true } 52 tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"], optional = true } 53 tracing-loki = { version = "0.2", optional = true } 54 55 getrandom = { version = "0.3", features = [] } 56 ring = { version = "0.17", default-features = false }
··· 10 native = ["jacquard/dns"] 11 use-index = [] 12 iroh = ["dep:iroh", "dep:iroh-gossip", "dep:iroh-tickets"] 13 + telemetry = ["dep:metrics", "dep:metrics-exporter-prometheus", "dep:tracing-subscriber", "dep:tracing-loki", "dep:axum"] 14 cache = ["dep:mini-moka-wasm"] 15 perf = [] 16 ··· 51 metrics-exporter-prometheus = { version = "0.17.2", optional = true } 52 tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"], optional = true } 53 tracing-loki = { version = "0.2", optional = true } 54 + axum = { version = "0.8", optional = true } 55 56 getrandom = { version = "0.3", features = [] } 57 ring = { version = "0.17", default-features = false }
+102 -10
crates/weaver-common/src/telemetry.rs
··· 67 } 68 } 69 70 /// Initialize telemetry (metrics + tracing). 71 /// 72 /// Call once at application startup. If `LOKI_URL` is set, spawns a background ··· 75 // Initialize prometheus metrics 76 init_metrics(); 77 78 - // Initialize tracing 79 - init_tracing(config).await; 80 } 81 82 /// Initialize just the prometheus metrics recorder. 83 pub fn init_metrics() -> &'static PrometheusHandle { 84 PROMETHEUS_HANDLE.get_or_init(|| { 85 PrometheusBuilder::new() 86 .install_recorder() 87 .expect("failed to install prometheus recorder") 88 }) 89 } 90 91 /// Initialize tracing with console + optional Loki layers. 92 - async fn init_tracing(config: TelemetryConfig) { 93 let env_filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| { 94 - EnvFilter::new(format!( 95 - "{}", 96 - config.console_level.as_str().to_lowercase() 97 - )) 98 }); 99 100 // Pretty console layer for human-readable stdout ··· 121 .with(loki_layer) 122 .init(); 123 124 - // Spawn the background task that pushes to Loki 125 - tokio::spawn(loki_task); 126 - 127 tracing::info!( 128 service = %config.service_name, 129 loki_url = %loki_url, 130 "telemetry initialized with loki" 131 ); 132 } 133 Err(e) => { 134 // Invalid URL - fall back to console only ··· 139 loki_url = %loki_url, 140 "invalid LOKI_URL, falling back to console only" 141 ); 142 } 143 } 144 } else { ··· 149 service = %config.service_name, 150 "telemetry initialized (console only, set LOKI_URL to enable loki)" 151 ); 152 } 153 } 154 ··· 168 169 // Re-export the metrics crate for convenience 170 pub use metrics::{counter, gauge, histogram};
··· 67 } 68 } 69 70 + /// Opaque handle for the Loki background task. 71 + pub struct LokiTask(tracing_loki::BackgroundTask); 72 + 73 /// Initialize telemetry (metrics + tracing). 74 /// 75 /// Call once at application startup. If `LOKI_URL` is set, spawns a background ··· 78 // Initialize prometheus metrics 79 init_metrics(); 80 81 + // Initialize tracing subscriber 82 + if let Some(task) = init_tracing(config) { 83 + // Spawn the loki background task 84 + tokio::spawn(task.0); 85 + } 86 + } 87 + 88 + /// Initialize telemetry without spawning the Loki task. 89 + /// 90 + /// Use this when you need to set up tracing before a tokio runtime is available. 91 + /// Returns the Loki task if configured - caller must spawn it later with `spawn_loki_task`. 92 + pub fn init_sync(config: TelemetryConfig) -> Option<LokiTask> { 93 + init_metrics(); 94 + init_tracing(config) 95 + } 96 + 97 + /// Spawn the Loki background task. 98 + /// 99 + /// Call this inside a tokio runtime after `init_sync`. 100 + pub fn spawn_loki_task(task: LokiTask) { 101 + tokio::spawn(task.0); 102 } 103 104 /// Initialize just the prometheus metrics recorder. 105 pub fn init_metrics() -> &'static PrometheusHandle { 106 PROMETHEUS_HANDLE.get_or_init(|| { 107 + // HTTP request duration buckets (in seconds) 108 + let http_buckets = vec![ 109 + 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 110 + ]; 111 + 112 PrometheusBuilder::new() 113 + .set_buckets_for_metric( 114 + metrics_exporter_prometheus::Matcher::Prefix("http_request_duration".to_string()), 115 + &http_buckets, 116 + ) 117 + .expect("failed to set histogram buckets") 118 .install_recorder() 119 .expect("failed to install prometheus recorder") 120 }) 121 } 122 123 /// Initialize tracing with console + optional Loki layers. 124 + /// 125 + /// Returns the Loki background task if Loki is configured. 126 + fn init_tracing(config: TelemetryConfig) -> Option<LokiTask> { 127 let env_filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| { 128 + EnvFilter::new(format!("{}", config.console_level.as_str().to_lowercase())) 129 }); 130 131 // Pretty console layer for human-readable stdout ··· 152 .with(loki_layer) 153 .init(); 154 155 tracing::info!( 156 service = %config.service_name, 157 loki_url = %loki_url, 158 "telemetry initialized with loki" 159 ); 160 + 161 + Some(LokiTask(loki_task)) 162 } 163 Err(e) => { 164 // Invalid URL - fall back to console only ··· 169 loki_url = %loki_url, 170 "invalid LOKI_URL, falling back to console only" 171 ); 172 + None 173 } 174 } 175 } else { ··· 180 service = %config.service_name, 181 "telemetry initialized (console only, set LOKI_URL to enable loki)" 182 ); 183 + None 184 } 185 } 186 ··· 200 201 // Re-export the metrics crate for convenience 202 pub use metrics::{counter, gauge, histogram}; 203 + 204 + /// HTTP metrics middleware for axum. 205 + /// 206 + /// Records `http_requests_total` counter and `http_request_duration_seconds` histogram. 207 + /// Use with `axum::middleware::from_fn`. 208 + /// 209 + /// # Example 210 + /// ```ignore 211 + /// use axum::middleware; 212 + /// use weaver_common::telemetry::http_metrics; 213 + /// 214 + /// let app = Router::new() 215 + /// .route("/", get(handler)) 216 + /// .layer(middleware::from_fn(http_metrics)); 217 + /// ``` 218 + #[cfg(feature = "telemetry")] 219 + pub async fn http_metrics( 220 + req: axum::extract::Request, 221 + next: axum::middleware::Next, 222 + ) -> axum::response::Response { 223 + let start = std::time::Instant::now(); 224 + let method = req.method().to_string(); 225 + let path = req.uri().path().to_string(); 226 + 227 + let response = next.run(req).await; 228 + 229 + let duration = start.elapsed().as_secs_f64(); 230 + let status = response.status().as_u16().to_string(); 231 + 232 + metrics::counter!( 233 + "http_requests_total", 234 + "method" => method.clone(), 235 + "path" => normalize_path(&path), 236 + "status" => status 237 + ) 238 + .increment(1); 239 + 240 + metrics::histogram!( 241 + "http_request_duration_seconds", 242 + "method" => method, 243 + "path" => normalize_path(&path) 244 + ) 245 + .record(duration); 246 + 247 + response 248 + } 249 + 250 + /// Normalize path for metrics labels. 251 + /// Keeps first 3 segments, collapses rest to reduce cardinality. 252 + #[cfg(feature = "telemetry")] 253 + fn normalize_path(path: &str) -> String { 254 + let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect(); 255 + match parts.len() { 256 + 0 => "/".to_string(), 257 + 1 => format!("/{}", parts[0]), 258 + 2 => format!("/{}/{}", parts[0], parts[1]), 259 + 3 => format!("/{}/{}/{}", parts[0], parts[1], parts[2]), 260 + _ => format!("/{}/{}/{}/*", parts[0], parts[1], parts[2]), 261 + } 262 + }
+2 -1
crates/weaver-index/src/server.rs
··· 2 use std::sync::Arc; 3 4 use axum::{ 5 - Json, Router, 6 extract::State, 7 http::{StatusCode, header}, 8 response::{Html, IntoResponse}, ··· 166 .merge(GetEditHistoryRequest::into_router(edit::get_edit_history)) 167 .merge(GetContributorsRequest::into_router(edit::get_contributors)) 168 .merge(ListDraftsRequest::into_router(edit::list_drafts)) 169 .layer(TraceLayer::new_for_http()) 170 .layer(CorsLayer::permissive().max_age(std::time::Duration::from_secs(86400))) 171 .with_state(state)
··· 2 use std::sync::Arc; 3 4 use axum::{ 5 + Json, Router, middleware, 6 extract::State, 7 http::{StatusCode, header}, 8 response::{Html, IntoResponse}, ··· 166 .merge(GetEditHistoryRequest::into_router(edit::get_edit_history)) 167 .merge(GetContributorsRequest::into_router(edit::get_contributors)) 168 .merge(ListDraftsRequest::into_router(edit::list_drafts)) 169 + .layer(middleware::from_fn(telemetry::http_metrics)) 170 .layer(TraceLayer::new_for_http()) 171 .layer(CorsLayer::permissive().max_age(std::time::Duration::from_secs(86400))) 172 .with_state(state)
+58 -5
docker-compose.yml
··· 6 ports: 7 - "80:80" 8 - "443:443" 9 volumes: 10 - ./infra/caddy/Caddyfile:/etc/caddy/Caddyfile:ro 11 - caddy_data:/data 12 - caddy_config:/config 13 environment: 14 CLOUDFLARE_API_TOKEN: ${CLOUDFLARE_API_TOKEN} 15 - ACME_EMAIL: ${ACME_EMAIL:-admin@weaver.sh} 16 depends_on: 17 - weaver-app 18 - index 19 restart: unless-stopped 20 21 - # ClickHouse - analytics database (internal only, no host ports exposed) 22 clickhouse: 23 image: clickhouse/clickhouse-server:25.11 24 container_name: weaver-clickhouse 25 - # No ports exposed to host - only accessible via docker network 26 ports: 27 - "8123:8123" 28 - "9000:9000" 29 volumes: 30 - ~/data/clickhouse:/var/lib/clickhouse 31 - ~/data/clickhouse-logs:/var/log/clickhouse-server 32 - ~/data/clickhouse-config:/etc/clickhouse-server/config.d 33 environment: 34 CLICKHOUSE_DB: ${CLICKHOUSE_DATABASE:-weaver} 35 CLICKHOUSE_USER: ${CLICKHOUSE_USER:-default} ··· 62 image: ghcr.io/bluesky-social/indigo/tap:latest 63 ports: 64 - "2480:2480" 65 volumes: 66 - tap_data:/data/tap 67 environment: 68 TAP_DATABASE_URL: sqlite:///data/tap/tap.db 69 TAP_BIND: ":2480" 70 TAP_DISABLE_ACKS: "false" 71 - TAP_LOG_LEVEL: info 72 TAP_OUTBOX_PARALLELISM: 5 73 #TAP_FULL_NETWORK: true 74 #TAP_SIGNAL_COLLECTION: place.stream.chat.profile ··· 92 - index_data:/app/data 93 environment: 94 RUST_LOG: info,weaver_index=debug,hyper_util::client::legacy::pool=info 95 CLICKHOUSE_URL: http://clickhouse:8123 96 CLICKHOUSE_DATABASE: ${CLICKHOUSE_DATABASE:-weaver} 97 CLICKHOUSE_USER: ${CLICKHOUSE_USER:-default} ··· 122 environment: 123 PORT: 8080 124 IP: 0.0.0.0 125 - RUST_LOG: info 126 healthcheck: 127 test: ["CMD", "wget", "-q", "--spider", "http://localhost:8080/"] 128 interval: 20s 129 timeout: 5s 130 retries: 3 131 restart: unless-stopped 132 133 volumes:
··· 6 ports: 7 - "80:80" 8 - "443:443" 9 + - "2019:2019" # Admin API with metrics 10 volumes: 11 - ./infra/caddy/Caddyfile:/etc/caddy/Caddyfile:ro 12 - caddy_data:/data 13 - caddy_config:/config 14 environment: 15 CLOUDFLARE_API_TOKEN: ${CLOUDFLARE_API_TOKEN} 16 + ACME_EMAIL: ${ACME_EMAIL:-contact@weaver.sh} 17 depends_on: 18 - weaver-app 19 - index 20 restart: unless-stopped 21 22 + # ClickHouse - analytics database 23 clickhouse: 24 image: clickhouse/clickhouse-server:25.11 25 container_name: weaver-clickhouse 26 ports: 27 - "8123:8123" 28 - "9000:9000" 29 + - "9363:9363" # Prometheus metrics 30 volumes: 31 - ~/data/clickhouse:/var/lib/clickhouse 32 - ~/data/clickhouse-logs:/var/log/clickhouse-server 33 - ~/data/clickhouse-config:/etc/clickhouse-server/config.d 34 + - ./infra/clickhouse/prometheus.xml:/etc/clickhouse-server/config.d/prometheus.xml:ro 35 environment: 36 CLICKHOUSE_DB: ${CLICKHOUSE_DATABASE:-weaver} 37 CLICKHOUSE_USER: ${CLICKHOUSE_USER:-default} ··· 64 image: ghcr.io/bluesky-social/indigo/tap:latest 65 ports: 66 - "2480:2480" 67 + - "2481:2481" 68 volumes: 69 - tap_data:/data/tap 70 environment: 71 TAP_DATABASE_URL: sqlite:///data/tap/tap.db 72 TAP_BIND: ":2480" 73 + TAP_METRICS_LISTEN: ":2481" 74 TAP_DISABLE_ACKS: "false" 75 + TAP_LOG_LEVEL: debug 76 TAP_OUTBOX_PARALLELISM: 5 77 #TAP_FULL_NETWORK: true 78 #TAP_SIGNAL_COLLECTION: place.stream.chat.profile ··· 96 - index_data:/app/data 97 environment: 98 RUST_LOG: info,weaver_index=debug,hyper_util::client::legacy::pool=info 99 + LOKI_URL: ${LOKI_URL:-} 100 CLICKHOUSE_URL: http://clickhouse:8123 101 CLICKHOUSE_DATABASE: ${CLICKHOUSE_DATABASE:-weaver} 102 CLICKHOUSE_USER: ${CLICKHOUSE_USER:-default} ··· 127 environment: 128 PORT: 8080 129 IP: 0.0.0.0 130 + RUST_LOG: info,weaver-app=debug,weaver-common=debug,hyper=warn,hyper_util=warn,tower=warn,h2=warn,rustls=warn,reqwest=info,dioxus_core=warn,dioxus_signals=warn 131 + LOKI_URL: ${LOKI_URL:-} 132 healthcheck: 133 test: ["CMD", "wget", "-q", "--spider", "http://localhost:8080/"] 134 interval: 20s 135 timeout: 5s 136 retries: 3 137 + restart: unless-stopped 138 + 139 + # ============ OBSERVABILITY STACK ============ 140 + 141 + # Node exporter - host metrics (CPU, memory, disk, network) 142 + node-exporter: 143 + image: prom/node-exporter:latest 144 + container_name: weaver-node-exporter 145 + ports: 146 + - "9100:9100" 147 + volumes: 148 + - /proc:/host/proc:ro 149 + - /sys:/host/sys:ro 150 + - /:/rootfs:ro 151 + command: 152 + - "--path.procfs=/host/proc" 153 + - "--path.sysfs=/host/sys" 154 + - "--path.rootfs=/rootfs" 155 + - "--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)" 156 + restart: unless-stopped 157 + 158 + # cAdvisor - container metrics (per-container CPU, memory, network) 159 + cadvisor: 160 + image: gcr.io/cadvisor/cadvisor:latest 161 + container_name: weaver-cadvisor 162 + ports: 163 + - "9080:8080" 164 + volumes: 165 + - /:/rootfs:ro 166 + - /var/run:/var/run:ro 167 + - /sys:/sys:ro 168 + - /var/lib/docker/:/var/lib/docker:ro 169 + - /dev/disk/:/dev/disk:ro 170 + privileged: true 171 + restart: unless-stopped 172 + 173 + # Promtail - ship container logs to Loki 174 + promtail: 175 + image: grafana/promtail:latest 176 + container_name: weaver-promtail 177 + volumes: 178 + - ./infra/promtail/config.yml:/etc/promtail/config.yml:ro 179 + - /var/lib/docker/containers:/var/lib/docker/containers:ro 180 + - /var/run/docker.sock:/var/run/docker.sock:ro 181 + command: -config.file=/etc/promtail/config.yml -config.expand-env=true 182 + environment: 183 + LOKI_URL: ${LOKI_URL:-http://localhost:3100} 184 restart: unless-stopped 185 186 volumes:
+6
infra/caddy/Caddyfile
··· 4 on_demand_tls { 5 ask http://index:3000/internal/verify-domain 6 } 7 } 8 9 # Index service
··· 4 on_demand_tls { 5 ask http://index:3000/internal/verify-domain 6 } 7 + 8 + admin 0.0.0.0:2019 9 + 10 + servers { 11 + metrics 12 + } 13 } 14 15 # Index service
+9
infra/clickhouse/prometheus.xml
···
··· 1 + <clickhouse> 2 + <prometheus> 3 + <endpoint>/metrics</endpoint> 4 + <port>9363</port> 5 + <metrics>true</metrics> 6 + <events>true</events> 7 + <asynchronous_metrics>true</asynchronous_metrics> 8 + </prometheus> 9 + </clickhouse>
+414
infra/grafana/dashboards/weaver-clickhouse.json
···
··· 1 + { 2 + "annotations": { "list": [] }, 3 + "editable": true, 4 + "fiscalYearStartMonth": 0, 5 + "graphTooltip": 1, 6 + "links": [], 7 + "panels": [ 8 + { 9 + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, 10 + "id": 100, 11 + "title": "ClickHouse Overview", 12 + "type": "row" 13 + }, 14 + { 15 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 16 + "fieldConfig": { 17 + "defaults": { 18 + "color": { "mode": "thresholds" }, 19 + "mappings": [], 20 + "thresholds": { 21 + "mode": "absolute", 22 + "steps": [ 23 + { "color": "red", "value": null }, 24 + { "color": "green", "value": 1 } 25 + ] 26 + } 27 + } 28 + }, 29 + "gridPos": { "h": 4, "w": 4, "x": 0, "y": 1 }, 30 + "id": 1, 31 + "options": { 32 + "colorMode": "background", 33 + "graphMode": "none", 34 + "justifyMode": "auto", 35 + "orientation": "auto", 36 + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } 37 + }, 38 + "targets": [ 39 + { 40 + "expr": "up{service=\"weaver-clickhouse\"}", 41 + "refId": "A" 42 + } 43 + ], 44 + "title": "Status", 45 + "type": "stat" 46 + }, 47 + { 48 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 49 + "fieldConfig": { 50 + "defaults": { 51 + "color": { "mode": "thresholds" }, 52 + "mappings": [], 53 + "thresholds": { 54 + "mode": "absolute", 55 + "steps": [ 56 + { "color": "green", "value": null }, 57 + { "color": "yellow", "value": 50 }, 58 + { "color": "red", "value": 100 } 59 + ] 60 + } 61 + } 62 + }, 63 + "gridPos": { "h": 4, "w": 4, "x": 4, "y": 1 }, 64 + "id": 2, 65 + "options": { 66 + "colorMode": "value", 67 + "graphMode": "area", 68 + "justifyMode": "auto", 69 + "orientation": "auto", 70 + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } 71 + }, 72 + "targets": [ 73 + { 74 + "expr": "ClickHouseMetrics_Query{service=\"weaver-clickhouse\"}", 75 + "refId": "A" 76 + } 77 + ], 78 + "title": "Active Queries", 79 + "type": "stat" 80 + }, 81 + { 82 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 83 + "fieldConfig": { 84 + "defaults": { 85 + "color": { "mode": "thresholds" }, 86 + "mappings": [], 87 + "unit": "bytes", 88 + "thresholds": { 89 + "mode": "absolute", 90 + "steps": [ 91 + { "color": "green", "value": null }, 92 + { "color": "yellow", "value": 4294967296 }, 93 + { "color": "red", "value": 8589934592 } 94 + ] 95 + } 96 + } 97 + }, 98 + "gridPos": { "h": 4, "w": 4, "x": 8, "y": 1 }, 99 + "id": 3, 100 + "options": { 101 + "colorMode": "value", 102 + "graphMode": "area", 103 + "justifyMode": "auto", 104 + "orientation": "auto", 105 + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } 106 + }, 107 + "targets": [ 108 + { 109 + "expr": "ClickHouseMetrics_MemoryTracking{service=\"weaver-clickhouse\"}", 110 + "refId": "A" 111 + } 112 + ], 113 + "title": "Memory Used", 114 + "type": "stat" 115 + }, 116 + { 117 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 118 + "fieldConfig": { 119 + "defaults": { 120 + "color": { "mode": "thresholds" }, 121 + "mappings": [], 122 + "thresholds": { 123 + "mode": "absolute", 124 + "steps": [ 125 + { "color": "green", "value": null }, 126 + { "color": "yellow", "value": 50 }, 127 + { "color": "red", "value": 100 } 128 + ] 129 + } 130 + } 131 + }, 132 + "gridPos": { "h": 4, "w": 4, "x": 12, "y": 1 }, 133 + "id": 4, 134 + "options": { 135 + "colorMode": "value", 136 + "graphMode": "area", 137 + "justifyMode": "auto", 138 + "orientation": "auto", 139 + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } 140 + }, 141 + "targets": [ 142 + { 143 + "expr": "ClickHouseMetrics_TCPConnection{service=\"weaver-clickhouse\"}", 144 + "refId": "A" 145 + } 146 + ], 147 + "title": "TCP Connections", 148 + "type": "stat" 149 + }, 150 + { 151 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 152 + "fieldConfig": { 153 + "defaults": { 154 + "color": { "mode": "thresholds" }, 155 + "mappings": [], 156 + "thresholds": { 157 + "mode": "absolute", 158 + "steps": [ 159 + { "color": "green", "value": null }, 160 + { "color": "yellow", "value": 50 }, 161 + { "color": "red", "value": 100 } 162 + ] 163 + } 164 + } 165 + }, 166 + "gridPos": { "h": 4, "w": 4, "x": 16, "y": 1 }, 167 + "id": 5, 168 + "options": { 169 + "colorMode": "value", 170 + "graphMode": "area", 171 + "justifyMode": "auto", 172 + "orientation": "auto", 173 + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } 174 + }, 175 + "targets": [ 176 + { 177 + "expr": "ClickHouseMetrics_HTTPConnection{service=\"weaver-clickhouse\"}", 178 + "refId": "A" 179 + } 180 + ], 181 + "title": "HTTP Connections", 182 + "type": "stat" 183 + }, 184 + { 185 + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 }, 186 + "id": 101, 187 + "title": "Query Performance", 188 + "type": "row" 189 + }, 190 + { 191 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 192 + "fieldConfig": { 193 + "defaults": { 194 + "color": { "mode": "palette-classic" }, 195 + "unit": "short" 196 + } 197 + }, 198 + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 6 }, 199 + "id": 6, 200 + "options": { 201 + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" }, 202 + "tooltip": { "mode": "multi" } 203 + }, 204 + "targets": [ 205 + { 206 + "expr": "rate(ClickHouseProfileEvents_Query{service=\"weaver-clickhouse\"}[5m])", 207 + "legendFormat": "Queries/s", 208 + "refId": "A" 209 + }, 210 + { 211 + "expr": "rate(ClickHouseProfileEvents_SelectQuery{service=\"weaver-clickhouse\"}[5m])", 212 + "legendFormat": "Selects/s", 213 + "refId": "B" 214 + }, 215 + { 216 + "expr": "rate(ClickHouseProfileEvents_InsertQuery{service=\"weaver-clickhouse\"}[5m])", 217 + "legendFormat": "Inserts/s", 218 + "refId": "C" 219 + } 220 + ], 221 + "title": "Query Rate", 222 + "type": "timeseries" 223 + }, 224 + { 225 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 226 + "fieldConfig": { 227 + "defaults": { 228 + "color": { "mode": "palette-classic" }, 229 + "unit": "short" 230 + } 231 + }, 232 + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 6 }, 233 + "id": 7, 234 + "options": { 235 + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" }, 236 + "tooltip": { "mode": "multi" } 237 + }, 238 + "targets": [ 239 + { 240 + "expr": "rate(ClickHouseProfileEvents_FailedQuery{service=\"weaver-clickhouse\"}[5m])", 241 + "legendFormat": "Failed", 242 + "refId": "A" 243 + }, 244 + { 245 + "expr": "rate(ClickHouseProfileEvents_FailedSelectQuery{service=\"weaver-clickhouse\"}[5m])", 246 + "legendFormat": "Failed Selects", 247 + "refId": "B" 248 + }, 249 + { 250 + "expr": "rate(ClickHouseProfileEvents_FailedInsertQuery{service=\"weaver-clickhouse\"}[5m])", 251 + "legendFormat": "Failed Inserts", 252 + "refId": "C" 253 + } 254 + ], 255 + "title": "Failed Queries", 256 + "type": "timeseries" 257 + }, 258 + { 259 + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 }, 260 + "id": 102, 261 + "title": "Resources", 262 + "type": "row" 263 + }, 264 + { 265 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 266 + "fieldConfig": { 267 + "defaults": { 268 + "color": { "mode": "palette-classic" }, 269 + "unit": "bytes" 270 + } 271 + }, 272 + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 15 }, 273 + "id": 8, 274 + "options": { 275 + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" }, 276 + "tooltip": { "mode": "multi" } 277 + }, 278 + "targets": [ 279 + { 280 + "expr": "ClickHouseMetrics_MemoryTracking{service=\"weaver-clickhouse\"}", 281 + "legendFormat": "Memory Tracking", 282 + "refId": "A" 283 + }, 284 + { 285 + "expr": "ClickHouseAsyncMetrics_MemoryResident{service=\"weaver-clickhouse\"}", 286 + "legendFormat": "Resident", 287 + "refId": "B" 288 + } 289 + ], 290 + "title": "Memory Usage", 291 + "type": "timeseries" 292 + }, 293 + { 294 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 295 + "fieldConfig": { 296 + "defaults": { 297 + "color": { "mode": "palette-classic" }, 298 + "unit": "Bps" 299 + } 300 + }, 301 + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 15 }, 302 + "id": 9, 303 + "options": { 304 + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" }, 305 + "tooltip": { "mode": "multi" } 306 + }, 307 + "targets": [ 308 + { 309 + "expr": "rate(ClickHouseProfileEvents_ReadBufferFromFileDescriptorReadBytes{service=\"weaver-clickhouse\"}[5m])", 310 + "legendFormat": "Read", 311 + "refId": "A" 312 + }, 313 + { 314 + "expr": "rate(ClickHouseProfileEvents_WriteBufferFromFileDescriptorWriteBytes{service=\"weaver-clickhouse\"}[5m])", 315 + "legendFormat": "Write", 316 + "refId": "B" 317 + } 318 + ], 319 + "title": "Disk I/O", 320 + "type": "timeseries" 321 + }, 322 + { 323 + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 23 }, 324 + "id": 103, 325 + "title": "Merges & Parts", 326 + "type": "row" 327 + }, 328 + { 329 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 330 + "fieldConfig": { 331 + "defaults": { 332 + "color": { "mode": "palette-classic" }, 333 + "unit": "short" 334 + } 335 + }, 336 + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 24 }, 337 + "id": 10, 338 + "options": { 339 + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" }, 340 + "tooltip": { "mode": "multi" } 341 + }, 342 + "targets": [ 343 + { 344 + "expr": "ClickHouseMetrics_Merge{service=\"weaver-clickhouse\"}", 345 + "legendFormat": "Active Merges", 346 + "refId": "A" 347 + }, 348 + { 349 + "expr": "ClickHouseMetrics_BackgroundMergesAndMutationsPoolTask{service=\"weaver-clickhouse\"}", 350 + "legendFormat": "Pool Tasks", 351 + "refId": "B" 352 + } 353 + ], 354 + "title": "Merge Activity", 355 + "type": "timeseries" 356 + }, 357 + { 358 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 359 + "fieldConfig": { 360 + "defaults": { 361 + "color": { "mode": "palette-classic" }, 362 + "unit": "short" 363 + } 364 + }, 365 + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 24 }, 366 + "id": 11, 367 + "options": { 368 + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" }, 369 + "tooltip": { "mode": "multi" } 370 + }, 371 + "targets": [ 372 + { 373 + "expr": "ClickHouseMetrics_PartsActive{service=\"weaver-clickhouse\"}", 374 + "legendFormat": "Active Parts", 375 + "refId": "A" 376 + }, 377 + { 378 + "expr": "ClickHouseMetrics_PartsOutdated{service=\"weaver-clickhouse\"}", 379 + "legendFormat": "Outdated Parts", 380 + "refId": "B" 381 + } 382 + ], 383 + "title": "Parts", 384 + "type": "timeseries" 385 + } 386 + ], 387 + "schemaVersion": 39, 388 + "tags": ["weaver", "clickhouse"], 389 + "templating": { 390 + "list": [ 391 + { 392 + "current": { "selected": false, "text": "Prometheus", "value": "Prometheus" }, 393 + "hide": 0, 394 + "includeAll": false, 395 + "label": "Datasource", 396 + "multi": false, 397 + "name": "datasource", 398 + "options": [], 399 + "query": "prometheus", 400 + "queryValue": "", 401 + "refresh": 1, 402 + "regex": "", 403 + "skipUrlSync": false, 404 + "type": "datasource" 405 + } 406 + ] 407 + }, 408 + "time": { "from": "now-1h", "to": "now" }, 409 + "timepicker": {}, 410 + "timezone": "browser", 411 + "title": "Weaver ClickHouse", 412 + "uid": "weaver-clickhouse", 413 + "version": 1 414 + }
+294
infra/grafana/dashboards/weaver-infra.json
···
··· 1 + { 2 + "annotations": { "list": [] }, 3 + "editable": true, 4 + "fiscalYearStartMonth": 0, 5 + "graphTooltip": 1, 6 + "links": [], 7 + "panels": [ 8 + { 9 + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, 10 + "id": 100, 11 + "title": "Host Metrics (booskie-box)", 12 + "type": "row" 13 + }, 14 + { 15 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 16 + "fieldConfig": { 17 + "defaults": { 18 + "color": { "mode": "palette-classic" }, 19 + "unit": "percentunit", 20 + "min": 0, 21 + "max": 1 22 + } 23 + }, 24 + "gridPos": { "h": 6, "w": 8, "x": 0, "y": 1 }, 25 + "id": 1, 26 + "options": { 27 + "legend": { "displayMode": "list", "placement": "bottom" }, 28 + "tooltip": { "mode": "multi" } 29 + }, 30 + "targets": [ 31 + { 32 + "expr": "1 - avg(rate(node_cpu_seconds_total{service=\"weaver-node\", mode=\"idle\"}[5m]))", 33 + "legendFormat": "CPU Usage", 34 + "refId": "A" 35 + } 36 + ], 37 + "title": "CPU Usage", 38 + "type": "timeseries" 39 + }, 40 + { 41 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 42 + "fieldConfig": { 43 + "defaults": { 44 + "color": { "mode": "palette-classic" }, 45 + "unit": "bytes" 46 + } 47 + }, 48 + "gridPos": { "h": 6, "w": 8, "x": 8, "y": 1 }, 49 + "id": 2, 50 + "options": { 51 + "legend": { "displayMode": "list", "placement": "bottom" }, 52 + "tooltip": { "mode": "multi" } 53 + }, 54 + "targets": [ 55 + { 56 + "expr": "node_memory_MemTotal_bytes{service=\"weaver-node\"} - node_memory_MemAvailable_bytes{service=\"weaver-node\"}", 57 + "legendFormat": "Used", 58 + "refId": "A" 59 + }, 60 + { 61 + "expr": "node_memory_MemAvailable_bytes{service=\"weaver-node\"}", 62 + "legendFormat": "Available", 63 + "refId": "B" 64 + } 65 + ], 66 + "title": "Memory", 67 + "type": "timeseries" 68 + }, 69 + { 70 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 71 + "fieldConfig": { 72 + "defaults": { 73 + "color": { "mode": "palette-classic" }, 74 + "unit": "percentunit", 75 + "min": 0, 76 + "max": 1 77 + } 78 + }, 79 + "gridPos": { "h": 6, "w": 8, "x": 16, "y": 1 }, 80 + "id": 3, 81 + "options": { 82 + "legend": { "displayMode": "list", "placement": "bottom" }, 83 + "tooltip": { "mode": "multi" } 84 + }, 85 + "targets": [ 86 + { 87 + "expr": "1 - (node_filesystem_avail_bytes{service=\"weaver-node\", mountpoint=\"/\"} / node_filesystem_size_bytes{service=\"weaver-node\", mountpoint=\"/\"})", 88 + "legendFormat": "/ usage", 89 + "refId": "A" 90 + } 91 + ], 92 + "title": "Disk Usage", 93 + "type": "timeseries" 94 + }, 95 + { 96 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 97 + "fieldConfig": { 98 + "defaults": { 99 + "color": { "mode": "palette-classic" }, 100 + "unit": "Bps" 101 + } 102 + }, 103 + "gridPos": { "h": 6, "w": 12, "x": 0, "y": 7 }, 104 + "id": 4, 105 + "options": { 106 + "legend": { "displayMode": "list", "placement": "bottom" }, 107 + "tooltip": { "mode": "multi" } 108 + }, 109 + "targets": [ 110 + { 111 + "expr": "rate(node_network_receive_bytes_total{service=\"weaver-node\", device!~\"lo|veth.*|docker.*|br-.*\"}[5m])", 112 + "legendFormat": "{{device}} rx", 113 + "refId": "A" 114 + }, 115 + { 116 + "expr": "-rate(node_network_transmit_bytes_total{service=\"weaver-node\", device!~\"lo|veth.*|docker.*|br-.*\"}[5m])", 117 + "legendFormat": "{{device}} tx", 118 + "refId": "B" 119 + } 120 + ], 121 + "title": "Network I/O", 122 + "type": "timeseries" 123 + }, 124 + { 125 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 126 + "fieldConfig": { 127 + "defaults": { 128 + "color": { "mode": "palette-classic" }, 129 + "unit": "Bps" 130 + } 131 + }, 132 + "gridPos": { "h": 6, "w": 12, "x": 12, "y": 7 }, 133 + "id": 5, 134 + "options": { 135 + "legend": { "displayMode": "list", "placement": "bottom" }, 136 + "tooltip": { "mode": "multi" } 137 + }, 138 + "targets": [ 139 + { 140 + "expr": "rate(node_disk_read_bytes_total{service=\"weaver-node\"}[5m])", 141 + "legendFormat": "{{device}} read", 142 + "refId": "A" 143 + }, 144 + { 145 + "expr": "-rate(node_disk_written_bytes_total{service=\"weaver-node\"}[5m])", 146 + "legendFormat": "{{device}} write", 147 + "refId": "B" 148 + } 149 + ], 150 + "title": "Disk I/O", 151 + "type": "timeseries" 152 + }, 153 + { 154 + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 13 }, 155 + "id": 101, 156 + "title": "Container Metrics", 157 + "type": "row" 158 + }, 159 + { 160 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 161 + "fieldConfig": { 162 + "defaults": { 163 + "color": { "mode": "palette-classic" }, 164 + "unit": "percentunit" 165 + } 166 + }, 167 + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 14 }, 168 + "id": 6, 169 + "options": { 170 + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" }, 171 + "tooltip": { "mode": "multi" } 172 + }, 173 + "targets": [ 174 + { 175 + "expr": "rate(container_cpu_usage_seconds_total{service=\"weaver-cadvisor\", name=~\"weaver-.*\"}[5m])", 176 + "legendFormat": "{{name}}", 177 + "refId": "A" 178 + } 179 + ], 180 + "title": "Container CPU Usage", 181 + "type": "timeseries" 182 + }, 183 + { 184 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 185 + "fieldConfig": { 186 + "defaults": { 187 + "color": { "mode": "palette-classic" }, 188 + "unit": "bytes" 189 + } 190 + }, 191 + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 14 }, 192 + "id": 7, 193 + "options": { 194 + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" }, 195 + "tooltip": { "mode": "multi" } 196 + }, 197 + "targets": [ 198 + { 199 + "expr": "container_memory_usage_bytes{service=\"weaver-cadvisor\", name=~\"weaver-.*\"}", 200 + "legendFormat": "{{name}}", 201 + "refId": "A" 202 + } 203 + ], 204 + "title": "Container Memory Usage", 205 + "type": "timeseries" 206 + }, 207 + { 208 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 209 + "fieldConfig": { 210 + "defaults": { 211 + "color": { "mode": "palette-classic" }, 212 + "unit": "Bps" 213 + } 214 + }, 215 + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 22 }, 216 + "id": 8, 217 + "options": { 218 + "legend": { "calcs": ["mean"], "displayMode": "table", "placement": "bottom" }, 219 + "tooltip": { "mode": "multi" } 220 + }, 221 + "targets": [ 222 + { 223 + "expr": "rate(container_network_receive_bytes_total{service=\"weaver-cadvisor\", name=~\"weaver-.*\"}[5m])", 224 + "legendFormat": "{{name}} rx", 225 + "refId": "A" 226 + }, 227 + { 228 + "expr": "-rate(container_network_transmit_bytes_total{service=\"weaver-cadvisor\", name=~\"weaver-.*\"}[5m])", 229 + "legendFormat": "{{name}} tx", 230 + "refId": "B" 231 + } 232 + ], 233 + "title": "Container Network I/O", 234 + "type": "timeseries" 235 + }, 236 + { 237 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 238 + "fieldConfig": { 239 + "defaults": { 240 + "color": { "mode": "palette-classic" }, 241 + "unit": "short" 242 + } 243 + }, 244 + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 22 }, 245 + "id": 9, 246 + "options": { 247 + "legend": { "displayMode": "table", "placement": "bottom" }, 248 + "tooltip": { "mode": "multi" } 249 + }, 250 + "targets": [ 251 + { 252 + "expr": "container_last_seen{service=\"weaver-cadvisor\", name=~\"weaver-.*\"} - time()", 253 + "legendFormat": "{{name}}", 254 + "refId": "A", 255 + "hide": true 256 + }, 257 + { 258 + "expr": "count(container_last_seen{service=\"weaver-cadvisor\", name=~\"weaver-.*\"} > (time() - 60))", 259 + "legendFormat": "Running containers", 260 + "refId": "B" 261 + } 262 + ], 263 + "title": "Running Containers", 264 + "type": "stat" 265 + } 266 + ], 267 + "schemaVersion": 39, 268 + "tags": ["weaver", "infrastructure"], 269 + "templating": { 270 + "list": [ 271 + { 272 + "current": { "selected": false, "text": "Prometheus", "value": "Prometheus" }, 273 + "hide": 0, 274 + "includeAll": false, 275 + "label": "Datasource", 276 + "multi": false, 277 + "name": "datasource", 278 + "options": [], 279 + "query": "prometheus", 280 + "queryValue": "", 281 + "refresh": 1, 282 + "regex": "", 283 + "skipUrlSync": false, 284 + "type": "datasource" 285 + } 286 + ] 287 + }, 288 + "time": { "from": "now-1h", "to": "now" }, 289 + "timepicker": {}, 290 + "timezone": "browser", 291 + "title": "Weaver Infrastructure", 292 + "uid": "weaver-infra", 293 + "version": 1 294 + }
+943
infra/grafana/dashboards/weaver-overview.json
···
··· 1 + { 2 + "annotations": { 3 + "list": [ 4 + { 5 + "builtIn": 1, 6 + "datasource": { 7 + "type": "grafana", 8 + "uid": "-- Grafana --" 9 + }, 10 + "enable": true, 11 + "hide": true, 12 + "iconColor": "rgba(0, 211, 255, 1)", 13 + "name": "Annotations & Alerts", 14 + "type": "dashboard" 15 + } 16 + ] 17 + }, 18 + "editable": true, 19 + "fiscalYearStartMonth": 0, 20 + "graphTooltip": 1, 21 + "id": 0, 22 + "links": [], 23 + "panels": [ 24 + { 25 + "collapsed": false, 26 + "gridPos": { 27 + "h": 1, 28 + "w": 24, 29 + "x": 0, 30 + "y": 0 31 + }, 32 + "id": 100, 33 + "panels": [], 34 + "title": "Weaver Services", 35 + "type": "row" 36 + }, 37 + { 38 + "datasource": { 39 + "type": "prometheus", 40 + "uid": "${datasource}" 41 + }, 42 + "fieldConfig": { 43 + "defaults": { 44 + "color": { 45 + "mode": "palette-classic" 46 + }, 47 + "custom": { 48 + "axisBorderShow": false, 49 + "axisCenteredZero": false, 50 + "axisColorMode": "text", 51 + "axisLabel": "", 52 + "axisPlacement": "auto", 53 + "barAlignment": 0, 54 + "barWidthFactor": 0.6, 55 + "drawStyle": "line", 56 + "fillOpacity": 0, 57 + "gradientMode": "none", 58 + "hideFrom": { 59 + "legend": false, 60 + "tooltip": false, 61 + "viz": false 62 + }, 63 + "insertNulls": false, 64 + "lineInterpolation": "linear", 65 + "lineWidth": 1, 66 + "pointSize": 5, 67 + "scaleDistribution": { 68 + "type": "linear" 69 + }, 70 + "showPoints": "auto", 71 + "showValues": false, 72 + "spanNulls": false, 73 + "stacking": { 74 + "group": "A", 75 + "mode": "none" 76 + }, 77 + "thresholdsStyle": { 78 + "mode": "off" 79 + } 80 + }, 81 + "mappings": [], 82 + "thresholds": { 83 + "mode": "absolute", 84 + "steps": [ 85 + { 86 + "color": "green", 87 + "value": 0 88 + } 89 + ] 90 + }, 91 + "unit": "reqps" 92 + }, 93 + "overrides": [] 94 + }, 95 + "gridPos": { 96 + "h": 8, 97 + "w": 12, 98 + "x": 0, 99 + "y": 1 100 + }, 101 + "id": 1, 102 + "options": { 103 + "legend": { 104 + "calcs": ["mean", "max"], 105 + "displayMode": "table", 106 + "placement": "bottom", 107 + "showLegend": true 108 + }, 109 + "tooltip": { 110 + "hideZeros": false, 111 + "mode": "multi", 112 + "sort": "none" 113 + } 114 + }, 115 + "pluginVersion": "12.3.1", 116 + "targets": [ 117 + { 118 + "expr": "rate(http_requests_total{service=~\"weaver-.*\"}[5m])", 119 + "legendFormat": "{{service}} {{method}} {{status}}", 120 + "refId": "A" 121 + } 122 + ], 123 + "title": "Request Rate", 124 + "type": "timeseries" 125 + }, 126 + { 127 + "datasource": { 128 + "type": "prometheus", 129 + "uid": "${datasource}" 130 + }, 131 + "fieldConfig": { 132 + "defaults": { 133 + "color": { 134 + "mode": "palette-classic" 135 + }, 136 + "custom": { 137 + "axisBorderShow": false, 138 + "axisCenteredZero": false, 139 + "axisColorMode": "text", 140 + "axisLabel": "", 141 + "axisPlacement": "auto", 142 + "barAlignment": 0, 143 + "barWidthFactor": 0.6, 144 + "drawStyle": "line", 145 + "fillOpacity": 0, 146 + "gradientMode": "none", 147 + "hideFrom": { 148 + "legend": false, 149 + "tooltip": false, 150 + "viz": false 151 + }, 152 + "insertNulls": false, 153 + "lineInterpolation": "linear", 154 + "lineWidth": 1, 155 + "pointSize": 5, 156 + "scaleDistribution": { 157 + "type": "linear" 158 + }, 159 + "showPoints": "auto", 160 + "showValues": false, 161 + "spanNulls": false, 162 + "stacking": { 163 + "group": "A", 164 + "mode": "none" 165 + }, 166 + "thresholdsStyle": { 167 + "mode": "off" 168 + } 169 + }, 170 + "mappings": [], 171 + "thresholds": { 172 + "mode": "absolute", 173 + "steps": [ 174 + { 175 + "color": "green", 176 + "value": 0 177 + }, 178 + { 179 + "color": "yellow", 180 + "value": 0.5 181 + }, 182 + { 183 + "color": "red", 184 + "value": 1 185 + } 186 + ] 187 + }, 188 + "unit": "s" 189 + }, 190 + "overrides": [] 191 + }, 192 + "gridPos": { 193 + "h": 8, 194 + "w": 12, 195 + "x": 12, 196 + "y": 1 197 + }, 198 + "id": 2, 199 + "options": { 200 + "legend": { 201 + "calcs": ["mean", "max", "p99"], 202 + "displayMode": "table", 203 + "placement": "bottom", 204 + "showLegend": true 205 + }, 206 + "tooltip": { 207 + "hideZeros": false, 208 + "mode": "multi", 209 + "sort": "none" 210 + } 211 + }, 212 + "pluginVersion": "12.3.1", 213 + "targets": [ 214 + { 215 + "expr": "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket{service=~\"weaver-.*\"}[5m]))", 216 + "legendFormat": "{{service}} p99", 217 + "refId": "A" 218 + }, 219 + { 220 + "expr": "histogram_quantile(0.50, rate(http_request_duration_seconds_bucket{service=~\"weaver-.*\"}[5m]))", 221 + "legendFormat": "{{service}} p50", 222 + "refId": "B" 223 + } 224 + ], 225 + "title": "Request Latency", 226 + "type": "timeseries" 227 + }, 228 + { 229 + "datasource": { 230 + "type": "prometheus", 231 + "uid": "${datasource}" 232 + }, 233 + "fieldConfig": { 234 + "defaults": { 235 + "color": { 236 + "mode": "palette-classic" 237 + }, 238 + "custom": { 239 + "axisBorderShow": false, 240 + "axisCenteredZero": false, 241 + "axisColorMode": "text", 242 + "axisLabel": "", 243 + "axisPlacement": "auto", 244 + "barAlignment": 0, 245 + "barWidthFactor": 0.6, 246 + "drawStyle": "line", 247 + "fillOpacity": 0, 248 + "gradientMode": "none", 249 + "hideFrom": { 250 + "legend": false, 251 + "tooltip": false, 252 + "viz": false 253 + }, 254 + "insertNulls": false, 255 + "lineInterpolation": "linear", 256 + "lineWidth": 1, 257 + "pointSize": 5, 258 + "scaleDistribution": { 259 + "type": "linear" 260 + }, 261 + "showPoints": "auto", 262 + "showValues": false, 263 + "spanNulls": false, 264 + "stacking": { 265 + "group": "A", 266 + "mode": "none" 267 + }, 268 + "thresholdsStyle": { 269 + "mode": "off" 270 + } 271 + }, 272 + "mappings": [], 273 + "thresholds": { 274 + "mode": "absolute", 275 + "steps": [ 276 + { 277 + "color": "green", 278 + "value": 0 279 + }, 280 + { 281 + "color": "red", 282 + "value": 1 283 + } 284 + ] 285 + }, 286 + "unit": "short" 287 + }, 288 + "overrides": [] 289 + }, 290 + "gridPos": { 291 + "h": 8, 292 + "w": 12, 293 + "x": 0, 294 + "y": 9 295 + }, 296 + "id": 3, 297 + "options": { 298 + "legend": { 299 + "calcs": ["sum"], 300 + "displayMode": "table", 301 + "placement": "bottom", 302 + "showLegend": true 303 + }, 304 + "tooltip": { 305 + "hideZeros": false, 306 + "mode": "multi", 307 + "sort": "none" 308 + } 309 + }, 310 + "pluginVersion": "12.3.1", 311 + "targets": [ 312 + { 313 + "expr": "rate(http_requests_total{service=~\"weaver-.*\", status=~\"5..\"}[5m])", 314 + "legendFormat": "{{service}} {{status}}", 315 + "refId": "A" 316 + } 317 + ], 318 + "title": "Error Rate (5xx)", 319 + "type": "timeseries" 320 + }, 321 + { 322 + "datasource": { 323 + "type": "prometheus", 324 + "uid": "${datasource}" 325 + }, 326 + "fieldConfig": { 327 + "defaults": { 328 + "color": { 329 + "mode": "thresholds" 330 + }, 331 + "mappings": [ 332 + { 333 + "options": { 334 + "0": { 335 + "color": "red", 336 + "text": "DOWN" 337 + }, 338 + "1": { 339 + "color": "green", 340 + "text": "UP" 341 + } 342 + }, 343 + "type": "value" 344 + } 345 + ], 346 + "thresholds": { 347 + "mode": "absolute", 348 + "steps": [ 349 + { 350 + "color": "red", 351 + "value": 0 352 + }, 353 + { 354 + "color": "green", 355 + "value": 1 356 + } 357 + ] 358 + } 359 + }, 360 + "overrides": [] 361 + }, 362 + "gridPos": { 363 + "h": 4, 364 + "w": 6, 365 + "x": 12, 366 + "y": 9 367 + }, 368 + "id": 4, 369 + "options": { 370 + "colorMode": "background", 371 + "graphMode": "none", 372 + "justifyMode": "auto", 373 + "orientation": "horizontal", 374 + "percentChangeColorMode": "standard", 375 + "reduceOptions": { 376 + "calcs": ["lastNotNull"], 377 + "fields": "", 378 + "values": false 379 + }, 380 + "showPercentChange": false, 381 + "textMode": "auto", 382 + "wideLayout": true 383 + }, 384 + "pluginVersion": "12.3.1", 385 + "targets": [ 386 + { 387 + "expr": "up{service=\"weaver-index\"}", 388 + "legendFormat": "weaver-index", 389 + "refId": "A" 390 + } 391 + ], 392 + "title": "Index Status", 393 + "type": "stat" 394 + }, 395 + { 396 + "datasource": { 397 + "type": "prometheus", 398 + "uid": "${datasource}" 399 + }, 400 + "fieldConfig": { 401 + "defaults": { 402 + "color": { 403 + "mode": "thresholds" 404 + }, 405 + "mappings": [ 406 + { 407 + "options": { 408 + "0": { 409 + "color": "red", 410 + "text": "DOWN" 411 + }, 412 + "1": { 413 + "color": "green", 414 + "text": "UP" 415 + } 416 + }, 417 + "type": "value" 418 + } 419 + ], 420 + "thresholds": { 421 + "mode": "absolute", 422 + "steps": [ 423 + { 424 + "color": "red", 425 + "value": 0 426 + }, 427 + { 428 + "color": "green", 429 + "value": 1 430 + } 431 + ] 432 + } 433 + }, 434 + "overrides": [] 435 + }, 436 + "gridPos": { 437 + "h": 4, 438 + "w": 6, 439 + "x": 18, 440 + "y": 9 441 + }, 442 + "id": 5, 443 + "options": { 444 + "colorMode": "background", 445 + "graphMode": "none", 446 + "justifyMode": "auto", 447 + "orientation": "horizontal", 448 + "percentChangeColorMode": "standard", 449 + "reduceOptions": { 450 + "calcs": ["lastNotNull"], 451 + "fields": "", 452 + "values": false 453 + }, 454 + "showPercentChange": false, 455 + "textMode": "auto", 456 + "wideLayout": true 457 + }, 458 + "pluginVersion": "12.3.1", 459 + "targets": [ 460 + { 461 + "expr": "up{service=\"weaver-app\"}", 462 + "legendFormat": "weaver-app", 463 + "refId": "A" 464 + } 465 + ], 466 + "title": "App Status", 467 + "type": "stat" 468 + }, 469 + { 470 + "collapsed": false, 471 + "gridPos": { 472 + "h": 1, 473 + "w": 24, 474 + "x": 0, 475 + "y": 17 476 + }, 477 + "id": 101, 478 + "panels": [], 479 + "title": "Caddy Proxy", 480 + "type": "row" 481 + }, 482 + { 483 + "datasource": { 484 + "type": "prometheus", 485 + "uid": "${datasource}" 486 + }, 487 + "fieldConfig": { 488 + "defaults": { 489 + "color": { 490 + "mode": "palette-classic" 491 + }, 492 + "custom": { 493 + "axisBorderShow": false, 494 + "axisCenteredZero": false, 495 + "axisColorMode": "text", 496 + "axisLabel": "", 497 + "axisPlacement": "auto", 498 + "barAlignment": 0, 499 + "barWidthFactor": 0.6, 500 + "drawStyle": "line", 501 + "fillOpacity": 0, 502 + "gradientMode": "none", 503 + "hideFrom": { 504 + "legend": false, 505 + "tooltip": false, 506 + "viz": false 507 + }, 508 + "insertNulls": false, 509 + "lineInterpolation": "linear", 510 + "lineWidth": 1, 511 + "pointSize": 5, 512 + "scaleDistribution": { 513 + "type": "linear" 514 + }, 515 + "showPoints": "auto", 516 + "showValues": false, 517 + "spanNulls": false, 518 + "stacking": { 519 + "group": "A", 520 + "mode": "none" 521 + }, 522 + "thresholdsStyle": { 523 + "mode": "off" 524 + } 525 + }, 526 + "mappings": [], 527 + "thresholds": { 528 + "mode": "absolute", 529 + "steps": [ 530 + { 531 + "color": "green", 532 + "value": 0 533 + }, 534 + { 535 + "color": "red", 536 + "value": 80 537 + } 538 + ] 539 + }, 540 + "unit": "reqps" 541 + }, 542 + "overrides": [] 543 + }, 544 + "gridPos": { 545 + "h": 8, 546 + "w": 12, 547 + "x": 0, 548 + "y": 18 549 + }, 550 + "id": 6, 551 + "options": { 552 + "legend": { 553 + "calcs": ["mean", "max"], 554 + "displayMode": "table", 555 + "placement": "bottom", 556 + "showLegend": true 557 + }, 558 + "tooltip": { 559 + "hideZeros": false, 560 + "mode": "multi", 561 + "sort": "none" 562 + } 563 + }, 564 + "pluginVersion": "12.3.1", 565 + "targets": [ 566 + { 567 + "expr": "rate(caddy_admin_http_requests_total{service=\"weaver-caddy\"}[5m])", 568 + "legendFormat": "{{handler}} {{code}}", 569 + "refId": "A" 570 + } 571 + ], 572 + "title": "Caddy Request Rate", 573 + "type": "timeseries" 574 + }, 575 + { 576 + "datasource": { 577 + "type": "prometheus", 578 + "uid": "${datasource}" 579 + }, 580 + "fieldConfig": { 581 + "defaults": { 582 + "color": { 583 + "mode": "palette-classic" 584 + }, 585 + "custom": { 586 + "axisBorderShow": false, 587 + "axisCenteredZero": false, 588 + "axisColorMode": "text", 589 + "axisLabel": "", 590 + "axisPlacement": "auto", 591 + "barAlignment": 0, 592 + "barWidthFactor": 0.6, 593 + "drawStyle": "line", 594 + "fillOpacity": 0, 595 + "gradientMode": "none", 596 + "hideFrom": { 597 + "legend": false, 598 + "tooltip": false, 599 + "viz": false 600 + }, 601 + "insertNulls": false, 602 + "lineInterpolation": "linear", 603 + "lineWidth": 1, 604 + "pointSize": 5, 605 + "scaleDistribution": { 606 + "type": "linear" 607 + }, 608 + "showPoints": "auto", 609 + "showValues": false, 610 + "spanNulls": false, 611 + "stacking": { 612 + "group": "A", 613 + "mode": "none" 614 + }, 615 + "thresholdsStyle": { 616 + "mode": "off" 617 + } 618 + }, 619 + "mappings": [], 620 + "thresholds": { 621 + "mode": "absolute", 622 + "steps": [ 623 + { 624 + "color": "green", 625 + "value": 0 626 + }, 627 + { 628 + "color": "red", 629 + "value": 80 630 + } 631 + ] 632 + }, 633 + "unit": "s" 634 + }, 635 + "overrides": [] 636 + }, 637 + "gridPos": { 638 + "h": 8, 639 + "w": 12, 640 + "x": 12, 641 + "y": 18 642 + }, 643 + "id": 7, 644 + "options": { 645 + "legend": { 646 + "calcs": ["mean", "max"], 647 + "displayMode": "table", 648 + "placement": "bottom", 649 + "showLegend": true 650 + }, 651 + "tooltip": { 652 + "hideZeros": false, 653 + "mode": "multi", 654 + "sort": "none" 655 + } 656 + }, 657 + "pluginVersion": "12.3.1", 658 + "targets": [ 659 + { 660 + "expr": "histogram_quantile(0.99, rate(caddy_http_request_duration_seconds_bucket{service=\"weaver-caddy\"}[5m]))", 661 + "legendFormat": "p99", 662 + "refId": "A" 663 + }, 664 + { 665 + "expr": "histogram_quantile(0.50, rate(caddy_http_request_duration_seconds_bucket{service=\"weaver-caddy\"}[5m]))", 666 + "legendFormat": "p50", 667 + "refId": "B" 668 + } 669 + ], 670 + "title": "Caddy Latency", 671 + "type": "timeseries" 672 + }, 673 + { 674 + "collapsed": false, 675 + "gridPos": { 676 + "h": 1, 677 + "w": 24, 678 + "x": 0, 679 + "y": 26 680 + }, 681 + "id": 102, 682 + "panels": [], 683 + "title": "Logs", 684 + "type": "row" 685 + }, 686 + { 687 + "datasource": { 688 + "type": "loki", 689 + "uid": "${loki}" 690 + }, 691 + "fieldConfig": { 692 + "defaults": {}, 693 + "overrides": [] 694 + }, 695 + "gridPos": { 696 + "h": 10, 697 + "w": 12, 698 + "x": 0, 699 + "y": 27 700 + }, 701 + "id": 8, 702 + "options": { 703 + "dedupStrategy": "none", 704 + "enableInfiniteScrolling": false, 705 + "enableLogDetails": true, 706 + "prettifyLogMessage": false, 707 + "showCommonLabels": false, 708 + "showControls": false, 709 + "showLabels": false, 710 + "showTime": true, 711 + "sortOrder": "Descending", 712 + "wrapLogMessage": true 713 + }, 714 + "pluginVersion": "12.3.1", 715 + "targets": [ 716 + { 717 + "expr": "{service_name=\"weaver-index\"} |= ``", 718 + "refId": "A" 719 + } 720 + ], 721 + "title": "Index Logs", 722 + "type": "logs" 723 + }, 724 + { 725 + "datasource": { 726 + "type": "loki", 727 + "uid": "${loki}" 728 + }, 729 + "fieldConfig": { 730 + "defaults": {}, 731 + "overrides": [] 732 + }, 733 + "gridPos": { 734 + "h": 10, 735 + "w": 12, 736 + "x": 12, 737 + "y": 27 738 + }, 739 + "id": 9, 740 + "options": { 741 + "dedupStrategy": "none", 742 + "enableInfiniteScrolling": false, 743 + "enableLogDetails": true, 744 + "prettifyLogMessage": false, 745 + "showCommonLabels": false, 746 + "showControls": false, 747 + "showLabels": false, 748 + "showTime": true, 749 + "sortOrder": "Descending", 750 + "wrapLogMessage": true 751 + }, 752 + "pluginVersion": "12.3.1", 753 + "targets": [ 754 + { 755 + "direction": "backward", 756 + "editorMode": "code", 757 + "expr": "{service=\"weaver-app\"} != `dioxus_core` or `hyper_util` or `dioxus_signals` or `reqwest` or `axum`", 758 + "queryType": "range", 759 + "refId": "A" 760 + } 761 + ], 762 + "title": "App Logs", 763 + "type": "logs" 764 + }, 765 + { 766 + "datasource": { 767 + "type": "loki", 768 + "uid": "${loki}" 769 + }, 770 + "fieldConfig": { 771 + "defaults": { 772 + "color": { 773 + "mode": "palette-classic" 774 + }, 775 + "custom": { 776 + "axisBorderShow": false, 777 + "axisCenteredZero": false, 778 + "axisColorMode": "text", 779 + "axisLabel": "", 780 + "axisPlacement": "auto", 781 + "barAlignment": 0, 782 + "barWidthFactor": 0.6, 783 + "drawStyle": "line", 784 + "fillOpacity": 0, 785 + "gradientMode": "none", 786 + "hideFrom": { 787 + "legend": false, 788 + "tooltip": false, 789 + "viz": false 790 + }, 791 + "insertNulls": false, 792 + "lineInterpolation": "linear", 793 + "lineWidth": 1, 794 + "pointSize": 5, 795 + "scaleDistribution": { 796 + "type": "linear" 797 + }, 798 + "showPoints": "auto", 799 + "showValues": false, 800 + "spanNulls": false, 801 + "stacking": { 802 + "group": "A", 803 + "mode": "none" 804 + }, 805 + "thresholdsStyle": { 806 + "mode": "off" 807 + } 808 + }, 809 + "mappings": [], 810 + "thresholds": { 811 + "mode": "absolute", 812 + "steps": [ 813 + { 814 + "color": "green", 815 + "value": 0 816 + }, 817 + { 818 + "color": "red", 819 + "value": 80 820 + } 821 + ] 822 + } 823 + }, 824 + "overrides": [] 825 + }, 826 + "gridPos": { 827 + "h": 6, 828 + "w": 24, 829 + "x": 0, 830 + "y": 37 831 + }, 832 + "id": 10, 833 + "options": { 834 + "legend": { 835 + "calcs": [], 836 + "displayMode": "list", 837 + "placement": "bottom", 838 + "showLegend": true 839 + }, 840 + "tooltip": { 841 + "hideZeros": false, 842 + "mode": "multi", 843 + "sort": "none" 844 + } 845 + }, 846 + "pluginVersion": "12.3.1", 847 + "targets": [ 848 + { 849 + "expr": "sum by (service_name) (count_over_time({service_name=~\"weaver-.*\"} | level=~\"error|ERROR|err\" [1m]))", 850 + "legendFormat": "{{service_name}}", 851 + "refId": "A" 852 + } 853 + ], 854 + "title": "Error Log Rate", 855 + "type": "timeseries" 856 + }, 857 + { 858 + "datasource": { 859 + "type": "loki", 860 + "uid": "${loki}" 861 + }, 862 + "fieldConfig": { 863 + "defaults": {}, 864 + "overrides": [] 865 + }, 866 + "gridPos": { 867 + "h": 8, 868 + "w": 24, 869 + "x": 0, 870 + "y": 43 871 + }, 872 + "id": 11, 873 + "options": { 874 + "dedupStrategy": "none", 875 + "enableInfiniteScrolling": false, 876 + "enableLogDetails": true, 877 + "prettifyLogMessage": false, 878 + "showCommonLabels": false, 879 + "showControls": false, 880 + "showLabels": true, 881 + "showTime": true, 882 + "sortOrder": "Descending", 883 + "wrapLogMessage": true 884 + }, 885 + "pluginVersion": "12.3.1", 886 + "targets": [ 887 + { 888 + "direction": "backward", 889 + "editorMode": "code", 890 + "expr": "{container_name=~\"weaver-clickhouse|weaver-caddy|weaver-tap\"} |= ``", 891 + "queryType": "range", 892 + "refId": "A" 893 + } 894 + ], 895 + "title": "Infrastructure Logs", 896 + "type": "logs" 897 + } 898 + ], 899 + "preload": false, 900 + "schemaVersion": 42, 901 + "tags": ["weaver"], 902 + "templating": { 903 + "list": [ 904 + { 905 + "current": { 906 + "text": "Prometheus", 907 + "value": "PBFA97CFB590B2093" 908 + }, 909 + "includeAll": false, 910 + "label": "Metrics", 911 + "name": "datasource", 912 + "options": [], 913 + "query": "prometheus", 914 + "refresh": 1, 915 + "regex": "", 916 + "type": "datasource" 917 + }, 918 + { 919 + "current": { 920 + "text": "Loki", 921 + "value": "P8E80F9AEF21F6940" 922 + }, 923 + "includeAll": false, 924 + "label": "Logs", 925 + "name": "loki", 926 + "options": [], 927 + "query": "loki", 928 + "refresh": 1, 929 + "regex": "", 930 + "type": "datasource" 931 + } 932 + ] 933 + }, 934 + "time": { 935 + "from": "now-1h", 936 + "to": "now" 937 + }, 938 + "timepicker": {}, 939 + "timezone": "browser", 940 + "title": "Weaver Overview", 941 + "uid": "weaver-overview", 942 + "version": 9 943 + }
+412
infra/grafana/dashboards/weaver-tap.json
···
··· 1 + { 2 + "annotations": { "list": [] }, 3 + "editable": true, 4 + "fiscalYearStartMonth": 0, 5 + "graphTooltip": 1, 6 + "links": [], 7 + "panels": [ 8 + { 9 + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, 10 + "id": 100, 11 + "title": "Firehose", 12 + "type": "row" 13 + }, 14 + { 15 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 16 + "fieldConfig": { 17 + "defaults": { 18 + "color": { "mode": "palette-classic" }, 19 + "unit": "short" 20 + } 21 + }, 22 + "gridPos": { "h": 6, "w": 8, "x": 0, "y": 1 }, 23 + "id": 1, 24 + "options": { 25 + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" }, 26 + "tooltip": { "mode": "multi" } 27 + }, 28 + "targets": [ 29 + { 30 + "expr": "rate(tap_firehose_events_received_total{service=\"weaver-tap\"}[5m])", 31 + "legendFormat": "Received", 32 + "refId": "A" 33 + }, 34 + { 35 + "expr": "rate(tap_firehose_events_processed_total{service=\"weaver-tap\"}[5m])", 36 + "legendFormat": "Processed", 37 + "refId": "B" 38 + }, 39 + { 40 + "expr": "rate(tap_firehose_events_skipped_total{service=\"weaver-tap\"}[5m])", 41 + "legendFormat": "Skipped", 42 + "refId": "C" 43 + } 44 + ], 45 + "title": "Firehose Event Rate", 46 + "type": "timeseries" 47 + }, 48 + { 49 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 50 + "fieldConfig": { 51 + "defaults": { 52 + "color": { "mode": "thresholds" }, 53 + "thresholds": { 54 + "mode": "absolute", 55 + "steps": [ 56 + { "color": "green", "value": null } 57 + ] 58 + }, 59 + "unit": "none" 60 + } 61 + }, 62 + "gridPos": { "h": 6, "w": 4, "x": 8, "y": 1 }, 63 + "id": 2, 64 + "options": { 65 + "colorMode": "value", 66 + "graphMode": "area", 67 + "justifyMode": "auto", 68 + "orientation": "auto", 69 + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } 70 + }, 71 + "targets": [ 72 + { 73 + "expr": "tap_firehose_last_seq{service=\"weaver-tap\"}", 74 + "legendFormat": "Last Seq", 75 + "refId": "A" 76 + } 77 + ], 78 + "title": "Last Sequence", 79 + "type": "stat" 80 + }, 81 + { 82 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 83 + "fieldConfig": { 84 + "defaults": { 85 + "color": { "mode": "thresholds" }, 86 + "thresholds": { 87 + "mode": "absolute", 88 + "steps": [ 89 + { "color": "green", "value": null }, 90 + { "color": "yellow", "value": 10000 }, 91 + { "color": "red", "value": 50000 } 92 + ] 93 + }, 94 + "unit": "short" 95 + } 96 + }, 97 + "gridPos": { "h": 6, "w": 4, "x": 12, "y": 1 }, 98 + "id": 3, 99 + "options": { 100 + "colorMode": "value", 101 + "graphMode": "area", 102 + "justifyMode": "auto", 103 + "orientation": "auto", 104 + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } 105 + }, 106 + "targets": [ 107 + { 108 + "expr": "tap_event_cache_size{service=\"weaver-tap\"}", 109 + "legendFormat": "Cache Size", 110 + "refId": "A" 111 + } 112 + ], 113 + "title": "Event Cache Size", 114 + "type": "stat" 115 + }, 116 + { 117 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 118 + "fieldConfig": { 119 + "defaults": { 120 + "color": { "mode": "palette-classic" }, 121 + "unit": "short" 122 + } 123 + }, 124 + "gridPos": { "h": 6, "w": 8, "x": 16, "y": 1 }, 125 + "id": 4, 126 + "options": { 127 + "legend": { "calcs": ["sum"], "displayMode": "table", "placement": "bottom" }, 128 + "tooltip": { "mode": "multi" } 129 + }, 130 + "targets": [ 131 + { 132 + "expr": "increase(tap_firehose_events_received_total{service=\"weaver-tap\"}[1h])", 133 + "legendFormat": "Received", 134 + "refId": "A" 135 + }, 136 + { 137 + "expr": "increase(tap_firehose_events_processed_total{service=\"weaver-tap\"}[1h])", 138 + "legendFormat": "Processed", 139 + "refId": "B" 140 + } 141 + ], 142 + "title": "Events (1h)", 143 + "type": "timeseries" 144 + }, 145 + { 146 + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 7 }, 147 + "id": 101, 148 + "title": "Resyncs", 149 + "type": "row" 150 + }, 151 + { 152 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 153 + "fieldConfig": { 154 + "defaults": { 155 + "color": { "mode": "palette-classic" }, 156 + "unit": "short" 157 + } 158 + }, 159 + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 }, 160 + "id": 5, 161 + "options": { 162 + "legend": { "calcs": ["sum"], "displayMode": "table", "placement": "bottom" }, 163 + "tooltip": { "mode": "multi" } 164 + }, 165 + "targets": [ 166 + { 167 + "expr": "rate(tap_resyncs_started_total{service=\"weaver-tap\"}[5m])", 168 + "legendFormat": "Started", 169 + "refId": "A" 170 + }, 171 + { 172 + "expr": "rate(tap_resyncs_completed_total{service=\"weaver-tap\"}[5m])", 173 + "legendFormat": "Completed", 174 + "refId": "B" 175 + }, 176 + { 177 + "expr": "rate(tap_resyncs_failed_total{service=\"weaver-tap\"}[5m])", 178 + "legendFormat": "Failed", 179 + "refId": "C" 180 + } 181 + ], 182 + "title": "Resync Rate", 183 + "type": "timeseries" 184 + }, 185 + { 186 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 187 + "fieldConfig": { 188 + "defaults": { 189 + "color": { "mode": "palette-classic" }, 190 + "unit": "s" 191 + } 192 + }, 193 + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 }, 194 + "id": 6, 195 + "options": { 196 + "legend": { "calcs": ["mean", "max", "p99"], "displayMode": "table", "placement": "bottom" }, 197 + "tooltip": { "mode": "multi" } 198 + }, 199 + "targets": [ 200 + { 201 + "expr": "histogram_quantile(0.99, rate(tap_resync_duration_seconds_bucket{service=\"weaver-tap\"}[5m]))", 202 + "legendFormat": "p99", 203 + "refId": "A" 204 + }, 205 + { 206 + "expr": "histogram_quantile(0.50, rate(tap_resync_duration_seconds_bucket{service=\"weaver-tap\"}[5m]))", 207 + "legendFormat": "p50", 208 + "refId": "B" 209 + } 210 + ], 211 + "title": "Resync Duration", 212 + "type": "timeseries" 213 + }, 214 + { 215 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 216 + "fieldConfig": { 217 + "defaults": { 218 + "color": { "mode": "thresholds" }, 219 + "thresholds": { 220 + "mode": "absolute", 221 + "steps": [ 222 + { "color": "green", "value": null } 223 + ] 224 + }, 225 + "unit": "short" 226 + } 227 + }, 228 + "gridPos": { "h": 4, "w": 4, "x": 0, "y": 16 }, 229 + "id": 7, 230 + "options": { 231 + "colorMode": "value", 232 + "graphMode": "none", 233 + "justifyMode": "auto", 234 + "orientation": "auto", 235 + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } 236 + }, 237 + "targets": [ 238 + { 239 + "expr": "tap_resyncs_completed_total{service=\"weaver-tap\"}", 240 + "legendFormat": "Completed", 241 + "refId": "A" 242 + } 243 + ], 244 + "title": "Total Completed", 245 + "type": "stat" 246 + }, 247 + { 248 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 249 + "fieldConfig": { 250 + "defaults": { 251 + "color": { "mode": "thresholds" }, 252 + "thresholds": { 253 + "mode": "absolute", 254 + "steps": [ 255 + { "color": "green", "value": null }, 256 + { "color": "red", "value": 1 } 257 + ] 258 + }, 259 + "unit": "short" 260 + } 261 + }, 262 + "gridPos": { "h": 4, "w": 4, "x": 4, "y": 16 }, 263 + "id": 8, 264 + "options": { 265 + "colorMode": "value", 266 + "graphMode": "none", 267 + "justifyMode": "auto", 268 + "orientation": "auto", 269 + "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false } 270 + }, 271 + "targets": [ 272 + { 273 + "expr": "tap_resyncs_failed_total{service=\"weaver-tap\"}", 274 + "legendFormat": "Failed", 275 + "refId": "A" 276 + } 277 + ], 278 + "title": "Total Failed", 279 + "type": "stat" 280 + }, 281 + { 282 + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 20 }, 283 + "id": 102, 284 + "title": "Event Delivery", 285 + "type": "row" 286 + }, 287 + { 288 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 289 + "fieldConfig": { 290 + "defaults": { 291 + "color": { "mode": "palette-classic" }, 292 + "unit": "short" 293 + } 294 + }, 295 + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 21 }, 296 + "id": 9, 297 + "options": { 298 + "legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" }, 299 + "tooltip": { "mode": "multi" } 300 + }, 301 + "targets": [ 302 + { 303 + "expr": "rate(tap_events_delivered_total{service=\"weaver-tap\"}[5m])", 304 + "legendFormat": "Delivered", 305 + "refId": "A" 306 + }, 307 + { 308 + "expr": "rate(tap_events_acked_total{service=\"weaver-tap\"}[5m])", 309 + "legendFormat": "Acked", 310 + "refId": "B" 311 + } 312 + ], 313 + "title": "Event Delivery Rate", 314 + "type": "timeseries" 315 + }, 316 + { 317 + "datasource": { "type": "prometheus", "uid": "${datasource}" }, 318 + "fieldConfig": { 319 + "defaults": { 320 + "color": { "mode": "palette-classic" }, 321 + "unit": "short" 322 + } 323 + }, 324 + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 21 }, 325 + "id": 10, 326 + "options": { 327 + "legend": { "calcs": ["mean"], "displayMode": "table", "placement": "bottom" }, 328 + "tooltip": { "mode": "multi" } 329 + }, 330 + "targets": [ 331 + { 332 + "expr": "rate(tap_crawler_repos_discovered_total{service=\"weaver-tap\"}[5m])", 333 + "legendFormat": "Repos Discovered", 334 + "refId": "A" 335 + } 336 + ], 337 + "title": "Crawler Discovery Rate", 338 + "type": "timeseries" 339 + }, 340 + { 341 + "gridPos": { "h": 1, "w": 24, "x": 0, "y": 29 }, 342 + "id": 103, 343 + "title": "Logs", 344 + "type": "row" 345 + }, 346 + { 347 + "datasource": { "type": "loki", "uid": "${loki}" }, 348 + "gridPos": { "h": 10, "w": 24, "x": 0, "y": 30 }, 349 + "id": 11, 350 + "options": { 351 + "dedupStrategy": "none", 352 + "enableLogDetails": true, 353 + "prettifyLogMessage": false, 354 + "showCommonLabels": false, 355 + "showLabels": true, 356 + "showTime": true, 357 + "sortOrder": "Descending", 358 + "wrapLogMessage": true 359 + }, 360 + "targets": [ 361 + { 362 + "expr": "{container_name=\"weaver-tap\"} |= ``", 363 + "refId": "A" 364 + } 365 + ], 366 + "title": "Tap Logs", 367 + "type": "logs" 368 + } 369 + ], 370 + "schemaVersion": 39, 371 + "tags": ["weaver", "tap", "atproto"], 372 + "templating": { 373 + "list": [ 374 + { 375 + "current": { "selected": false, "text": "Prometheus", "value": "Prometheus" }, 376 + "hide": 0, 377 + "includeAll": false, 378 + "label": "Metrics", 379 + "multi": false, 380 + "name": "datasource", 381 + "options": [], 382 + "query": "prometheus", 383 + "queryValue": "", 384 + "refresh": 1, 385 + "regex": "", 386 + "skipUrlSync": false, 387 + "type": "datasource" 388 + }, 389 + { 390 + "current": { "selected": false, "text": "Loki", "value": "Loki" }, 391 + "hide": 0, 392 + "includeAll": false, 393 + "label": "Logs", 394 + "multi": false, 395 + "name": "loki", 396 + "options": [], 397 + "query": "loki", 398 + "queryValue": "", 399 + "refresh": 1, 400 + "regex": "", 401 + "skipUrlSync": false, 402 + "type": "datasource" 403 + } 404 + ] 405 + }, 406 + "time": { "from": "now-1h", "to": "now" }, 407 + "timepicker": {}, 408 + "timezone": "browser", 409 + "title": "Weaver Tap", 410 + "uid": "weaver-tap", 411 + "version": 1 412 + }
+44
infra/promtail/config.yml
···
··· 1 + server: 2 + http_listen_port: 9080 3 + grpc_listen_port: 0 4 + 5 + positions: 6 + filename: /tmp/positions.yaml 7 + 8 + clients: 9 + - url: ${LOKI_URL}/loki/api/v1/push 10 + 11 + scrape_configs: 12 + - job_name: docker 13 + docker_sd_configs: 14 + - host: unix:///var/run/docker.sock 15 + refresh_interval: 5s 16 + relabel_configs: 17 + # Only scrape weaver containers 18 + - source_labels: [__meta_docker_container_name] 19 + regex: "/(weaver-.+)" 20 + action: keep 21 + # Set container_name label (matches dashboard queries) 22 + - source_labels: [__meta_docker_container_name] 23 + regex: "/(.+)" 24 + target_label: container_name 25 + # Add instance label 26 + - target_label: instance 27 + replacement: "booskie-box" 28 + pipeline_stages: 29 + # Parse JSON logs if present 30 + - json: 31 + expressions: 32 + level: level 33 + msg: msg 34 + timestamp: timestamp 35 + # Use extracted level if available 36 + - labels: 37 + level: 38 + # Timestamp from log if available 39 + - timestamp: 40 + source: timestamp 41 + format: RFC3339Nano 42 + fallback_formats: 43 + - RFC3339 44 + - UnixMs