tangled
alpha
login
or
join now
nonbinary.computer
/
weaver
atproto blogging
24
fork
atom
overview
issues
2
pulls
pipelines
observability stuff
Orual
3 weeks ago
4705bfcc
4453cdd3
+2378
-71
16 changed files
expand all
collapse all
unified
split
Cargo.lock
crates
weaver-app
Cargo.toml
src
components
app_link.rs
main.rs
views
subdomain_navbar.rs
weaver-common
Cargo.toml
src
telemetry.rs
weaver-index
src
server.rs
docker-compose.yml
infra
caddy
Caddyfile
clickhouse
prometheus.xml
grafana
dashboards
weaver-clickhouse.json
weaver-infra.json
weaver-overview.json
weaver-tap.json
promtail
config.yml
+2
Cargo.lock
···
12049
12049
"tiny-skia",
12050
12050
"tokio",
12051
12051
"tower",
12052
12052
+
"tower-http",
12052
12053
"tracing",
12053
12054
"tracing-subscriber",
12054
12055
"tracing-wasm",
···
12092
12093
name = "weaver-common"
12093
12094
version = "0.1.0"
12094
12095
dependencies = [
12096
12096
+
"axum",
12095
12097
"blake3",
12096
12098
"chrono",
12097
12099
"futures-util",
+2
-1
crates/weaver-app/Cargo.toml
···
39
39
web = ["dioxus/web", "dioxus-primitives/web"]
40
40
desktop = ["dioxus/desktop"]
41
41
mobile = ["dioxus/mobile"]
42
42
-
server = [ "dioxus/server", "dep:jacquard-axum", "dep:axum", "dep:axum-extra", "dep:tower", "dep:resvg", "dep:usvg", "dep:tiny-skia", "dep:textwrap", "dep:askama", "dep:fontdb", "dep:lightningcss"]
42
42
+
server = [ "dioxus/server", "dep:jacquard-axum", "dep:axum", "dep:axum-extra", "dep:tower", "dep:tower-http", "dep:resvg", "dep:usvg", "dep:tiny-skia", "dep:textwrap", "dep:askama", "dep:fontdb", "dep:lightningcss", "weaver-common/telemetry"]
43
43
collab-worker = ["weaver-common/iroh"]
44
44
45
45
···
63
63
axum = { version = "0.8.6", optional = true }
64
64
axum-extra = { version = "0.10", optional = true, features = ["typed-header"] }
65
65
tower = { version = "0.5", optional = true }
66
66
+
tower-http = { version = "0.6", optional = true, features = ["trace"] }
66
67
mime-sniffer = {version = "^0.1"}
67
68
chrono = { version = "0.4" }
68
69
serde = { version = "1.0" }
-1
crates/weaver-app/src/components/app_link.rs
···
75
75
#[component]
76
76
pub fn AppLink(props: AppLinkProps) -> Element {
77
77
let link_mode = use_context::<LinkMode>();
78
78
-
tracing::info!(?link_mode, "AppLink: reading LinkMode context");
79
78
let class = props.class.clone().unwrap_or_default();
80
79
81
80
match link_mode {
+85
-50
crates/weaver-app/src/main.rs
···
45
45
// Filter out noisy crates
46
46
// Use weaver_app=trace for detailed editor debugging
47
47
let filter = EnvFilter::new(
48
48
-
"debug,weaver_app=trace,loro_internal=warn,jacquard_identity=info,jacquard_common=info,iroh=info",
48
48
+
"debug,weaver_app=trace,loro_internal=warn,jacquard_identity=info,jacquard_common=info,iroh=info,reqwest=warn",
49
49
);
50
50
51
51
let reg = Registry::default()
···
56
56
let _ = set_global_default(reg);
57
57
}
58
58
59
59
+
// Initialize telemetry (metrics + tracing) before server starts.
60
60
+
// Loki task is spawned inside dioxus::serve where tokio runtime exists.
61
61
+
// Wrapped in Arc<Mutex> so the FnMut closure can clone and take() on first call.
62
62
+
#[cfg(feature = "server")]
63
63
+
let loki_task = {
64
64
+
use weaver_common::telemetry::{self, TelemetryConfig};
65
65
+
let config = TelemetryConfig::from_env("weaver-app");
66
66
+
std::sync::Arc::new(std::sync::Mutex::new(telemetry::init_sync(config)))
67
67
+
};
68
68
+
59
69
#[cfg(feature = "server")]
60
70
std::panic::set_hook(Box::new(|panic_info| {
61
71
tracing::error!("PANIC: {:?}", panic_info);
···
63
73
64
74
// Run `serve()` on the server only
65
75
#[cfg(feature = "server")]
66
66
-
dioxus::serve(|| async move {
67
67
-
#[cfg(feature = "fullstack-server")]
68
68
-
use axum::middleware;
69
69
-
use axum::middleware::Next;
70
70
-
use axum::{Router, body::Body, extract::Request, response::Response, routing::get};
71
71
-
use axum_extra::extract::Host;
72
72
-
use jacquard::oauth::{client::OAuthClient, session::ClientData};
73
73
-
use std::convert::Infallible;
74
74
-
use weaver_app::auth::AuthStore;
75
75
-
use weaver_app::blobcache::BlobCache;
76
76
+
dioxus::serve({
77
77
+
let loki_task = loki_task.clone();
78
78
+
move || {
79
79
+
let loki_task = loki_task.clone();
80
80
+
async move {
81
81
+
#[cfg(feature = "fullstack-server")]
82
82
+
use axum::middleware;
83
83
+
use axum::middleware::Next;
84
84
+
use axum::{
85
85
+
Router, body::Body, extract::Request, response::Response, routing::get,
86
86
+
};
87
87
+
use axum_extra::extract::Host;
88
88
+
use jacquard::oauth::{client::OAuthClient, session::ClientData};
89
89
+
use std::convert::Infallible;
90
90
+
use weaver_app::auth::AuthStore;
91
91
+
use weaver_app::blobcache::BlobCache;
92
92
+
use weaver_common::telemetry;
76
93
77
77
-
#[cfg(not(feature = "fullstack-server"))]
78
78
-
let router = { Router::new().merge(dioxus::server::router(App)) };
94
94
+
// Spawn the Loki background task now that we're in tokio runtime
95
95
+
if let Some(task) = loki_task.lock().unwrap().take() {
96
96
+
telemetry::spawn_loki_task(task);
97
97
+
}
79
98
80
80
-
#[cfg(feature = "fullstack-server")]
81
81
-
let router = {
82
82
-
let fetcher = Arc::new(fetch::Fetcher::new(OAuthClient::new(
83
83
-
AuthStore::new(),
84
84
-
ClientData::new_public(CONFIG.oauth.clone()),
85
85
-
)));
99
99
+
#[cfg(not(feature = "fullstack-server"))]
100
100
+
let router = {
101
101
+
Router::new()
102
102
+
.merge(dioxus::server::router(App))
103
103
+
.layer(middleware::from_fn(telemetry::http_metrics))
104
104
+
.layer(tower_http::trace::TraceLayer::new_for_http())
105
105
+
};
86
106
87
87
-
let blob_cache = Arc::new(BlobCache::new(fetcher.clone()));
88
88
-
axum::Router::new()
89
89
-
.route("/favicon.ico", get(weaver_app::favicon))
90
90
-
.serve_dioxus_application(ServeConfig::builder(), App)
91
91
-
// Host context resolution.
92
92
-
.layer(middleware::from_fn({
93
93
-
let fetcher = fetcher.clone();
94
94
-
move |req: Request, next: Next| {
95
95
-
let fetcher = fetcher.clone();
96
96
-
async move {
97
97
-
weaver_app::middleware::host_context_middleware(req, next, fetcher)
98
98
-
.await
99
99
-
}
100
100
-
}
101
101
-
}))
102
102
-
// Insert fetcher and blob cache into extensions.
103
103
-
.layer(middleware::from_fn({
104
104
-
let blob_cache = blob_cache.clone();
105
105
-
let fetcher = fetcher.clone();
106
106
-
move |mut req: Request, next: Next| {
107
107
-
let blob_cache = blob_cache.clone();
108
108
-
let fetcher = fetcher.clone();
109
109
-
async move {
110
110
-
req.extensions_mut().insert(blob_cache);
111
111
-
req.extensions_mut().insert(fetcher);
112
112
-
Ok::<_, Infallible>(next.run(req).await)
113
113
-
}
114
114
-
}
115
115
-
}))
116
116
-
};
117
117
-
Ok(router)
107
107
+
#[cfg(feature = "fullstack-server")]
108
108
+
let router = {
109
109
+
let fetcher = Arc::new(fetch::Fetcher::new(OAuthClient::new(
110
110
+
AuthStore::new(),
111
111
+
ClientData::new_public(CONFIG.oauth.clone()),
112
112
+
)));
113
113
+
114
114
+
let blob_cache = Arc::new(BlobCache::new(fetcher.clone()));
115
115
+
axum::Router::new()
116
116
+
.route("/favicon.ico", get(weaver_app::favicon))
117
117
+
.route("/metrics", get(|| async { telemetry::render() }))
118
118
+
.serve_dioxus_application(ServeConfig::builder(), App)
119
119
+
// Host context resolution.
120
120
+
.layer(middleware::from_fn({
121
121
+
let fetcher = fetcher.clone();
122
122
+
move |req: Request, next: Next| {
123
123
+
let fetcher = fetcher.clone();
124
124
+
async move {
125
125
+
weaver_app::middleware::host_context_middleware(
126
126
+
req, next, fetcher,
127
127
+
)
128
128
+
.await
129
129
+
}
130
130
+
}
131
131
+
}))
132
132
+
// Insert fetcher and blob cache into extensions.
133
133
+
.layer(middleware::from_fn({
134
134
+
let blob_cache = blob_cache.clone();
135
135
+
let fetcher = fetcher.clone();
136
136
+
move |mut req: Request, next: Next| {
137
137
+
let blob_cache = blob_cache.clone();
138
138
+
let fetcher = fetcher.clone();
139
139
+
async move {
140
140
+
req.extensions_mut().insert(blob_cache);
141
141
+
req.extensions_mut().insert(fetcher);
142
142
+
Ok::<_, Infallible>(next.run(req).await)
143
143
+
}
144
144
+
}
145
145
+
}))
146
146
+
// HTTP metrics (request count, duration)
147
147
+
.layer(middleware::from_fn(telemetry::http_metrics))
148
148
+
.layer(tower_http::trace::TraceLayer::new_for_http())
149
149
+
};
150
150
+
Ok(router)
151
151
+
}
152
152
+
}
118
153
});
119
154
120
155
#[cfg(not(feature = "server"))]
+3
-2
crates/weaver-app/src/views/subdomain_navbar.rs
···
98
98
}
99
99
}
100
100
}
101
101
-
// Author profile link
101
101
+
// Author profile link - temporarily disabled to debug SSR hang
102
102
nav { class: "nav-tools",
103
103
-
AuthorProfileLink { ident: ctx.owner.clone() }
103
103
+
// AuthorProfileLink { ident: ctx.owner.clone() }
104
104
+
"DEBUG: navbar without author link"
104
105
}
105
106
106
107
// Auth button
+2
-1
crates/weaver-common/Cargo.toml
···
10
10
native = ["jacquard/dns"]
11
11
use-index = []
12
12
iroh = ["dep:iroh", "dep:iroh-gossip", "dep:iroh-tickets"]
13
13
-
telemetry = ["dep:metrics", "dep:metrics-exporter-prometheus", "dep:tracing-subscriber", "dep:tracing-loki"]
13
13
+
telemetry = ["dep:metrics", "dep:metrics-exporter-prometheus", "dep:tracing-subscriber", "dep:tracing-loki", "dep:axum"]
14
14
cache = ["dep:mini-moka-wasm"]
15
15
perf = []
16
16
···
51
51
metrics-exporter-prometheus = { version = "0.17.2", optional = true }
52
52
tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"], optional = true }
53
53
tracing-loki = { version = "0.2", optional = true }
54
54
+
axum = { version = "0.8", optional = true }
54
55
55
56
getrandom = { version = "0.3", features = [] }
56
57
ring = { version = "0.17", default-features = false }
+102
-10
crates/weaver-common/src/telemetry.rs
···
67
67
}
68
68
}
69
69
70
70
+
/// Opaque handle for the Loki background task.
71
71
+
pub struct LokiTask(tracing_loki::BackgroundTask);
72
72
+
70
73
/// Initialize telemetry (metrics + tracing).
71
74
///
72
75
/// Call once at application startup. If `LOKI_URL` is set, spawns a background
···
75
78
// Initialize prometheus metrics
76
79
init_metrics();
77
80
78
78
-
// Initialize tracing
79
79
-
init_tracing(config).await;
81
81
+
// Initialize tracing subscriber
82
82
+
if let Some(task) = init_tracing(config) {
83
83
+
// Spawn the loki background task
84
84
+
tokio::spawn(task.0);
85
85
+
}
86
86
+
}
87
87
+
88
88
+
/// Initialize telemetry without spawning the Loki task.
89
89
+
///
90
90
+
/// Use this when you need to set up tracing before a tokio runtime is available.
91
91
+
/// Returns the Loki task if configured - caller must spawn it later with `spawn_loki_task`.
92
92
+
pub fn init_sync(config: TelemetryConfig) -> Option<LokiTask> {
93
93
+
init_metrics();
94
94
+
init_tracing(config)
95
95
+
}
96
96
+
97
97
+
/// Spawn the Loki background task.
98
98
+
///
99
99
+
/// Call this inside a tokio runtime after `init_sync`.
100
100
+
pub fn spawn_loki_task(task: LokiTask) {
101
101
+
tokio::spawn(task.0);
80
102
}
81
103
82
104
/// Initialize just the prometheus metrics recorder.
83
105
pub fn init_metrics() -> &'static PrometheusHandle {
84
106
PROMETHEUS_HANDLE.get_or_init(|| {
107
107
+
// HTTP request duration buckets (in seconds)
108
108
+
let http_buckets = vec![
109
109
+
0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0,
110
110
+
];
111
111
+
85
112
PrometheusBuilder::new()
113
113
+
.set_buckets_for_metric(
114
114
+
metrics_exporter_prometheus::Matcher::Prefix("http_request_duration".to_string()),
115
115
+
&http_buckets,
116
116
+
)
117
117
+
.expect("failed to set histogram buckets")
86
118
.install_recorder()
87
119
.expect("failed to install prometheus recorder")
88
120
})
89
121
}
90
122
91
123
/// Initialize tracing with console + optional Loki layers.
92
92
-
async fn init_tracing(config: TelemetryConfig) {
124
124
+
///
125
125
+
/// Returns the Loki background task if Loki is configured.
126
126
+
fn init_tracing(config: TelemetryConfig) -> Option<LokiTask> {
93
127
let env_filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| {
94
94
-
EnvFilter::new(format!(
95
95
-
"{}",
96
96
-
config.console_level.as_str().to_lowercase()
97
97
-
))
128
128
+
EnvFilter::new(format!("{}", config.console_level.as_str().to_lowercase()))
98
129
});
99
130
100
131
// Pretty console layer for human-readable stdout
···
121
152
.with(loki_layer)
122
153
.init();
123
154
124
124
-
// Spawn the background task that pushes to Loki
125
125
-
tokio::spawn(loki_task);
126
126
-
127
155
tracing::info!(
128
156
service = %config.service_name,
129
157
loki_url = %loki_url,
130
158
"telemetry initialized with loki"
131
159
);
160
160
+
161
161
+
Some(LokiTask(loki_task))
132
162
}
133
163
Err(e) => {
134
164
// Invalid URL - fall back to console only
···
139
169
loki_url = %loki_url,
140
170
"invalid LOKI_URL, falling back to console only"
141
171
);
172
172
+
None
142
173
}
143
174
}
144
175
} else {
···
149
180
service = %config.service_name,
150
181
"telemetry initialized (console only, set LOKI_URL to enable loki)"
151
182
);
183
183
+
None
152
184
}
153
185
}
154
186
···
168
200
169
201
// Re-export the metrics crate for convenience
170
202
pub use metrics::{counter, gauge, histogram};
203
203
+
204
204
+
/// HTTP metrics middleware for axum.
205
205
+
///
206
206
+
/// Records `http_requests_total` counter and `http_request_duration_seconds` histogram.
207
207
+
/// Use with `axum::middleware::from_fn`.
208
208
+
///
209
209
+
/// # Example
210
210
+
/// ```ignore
211
211
+
/// use axum::middleware;
212
212
+
/// use weaver_common::telemetry::http_metrics;
213
213
+
///
214
214
+
/// let app = Router::new()
215
215
+
/// .route("/", get(handler))
216
216
+
/// .layer(middleware::from_fn(http_metrics));
217
217
+
/// ```
218
218
+
#[cfg(feature = "telemetry")]
219
219
+
pub async fn http_metrics(
220
220
+
req: axum::extract::Request,
221
221
+
next: axum::middleware::Next,
222
222
+
) -> axum::response::Response {
223
223
+
let start = std::time::Instant::now();
224
224
+
let method = req.method().to_string();
225
225
+
let path = req.uri().path().to_string();
226
226
+
227
227
+
let response = next.run(req).await;
228
228
+
229
229
+
let duration = start.elapsed().as_secs_f64();
230
230
+
let status = response.status().as_u16().to_string();
231
231
+
232
232
+
metrics::counter!(
233
233
+
"http_requests_total",
234
234
+
"method" => method.clone(),
235
235
+
"path" => normalize_path(&path),
236
236
+
"status" => status
237
237
+
)
238
238
+
.increment(1);
239
239
+
240
240
+
metrics::histogram!(
241
241
+
"http_request_duration_seconds",
242
242
+
"method" => method,
243
243
+
"path" => normalize_path(&path)
244
244
+
)
245
245
+
.record(duration);
246
246
+
247
247
+
response
248
248
+
}
249
249
+
250
250
+
/// Normalize path for metrics labels.
251
251
+
/// Keeps first 3 segments, collapses rest to reduce cardinality.
252
252
+
#[cfg(feature = "telemetry")]
253
253
+
fn normalize_path(path: &str) -> String {
254
254
+
let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect();
255
255
+
match parts.len() {
256
256
+
0 => "/".to_string(),
257
257
+
1 => format!("/{}", parts[0]),
258
258
+
2 => format!("/{}/{}", parts[0], parts[1]),
259
259
+
3 => format!("/{}/{}/{}", parts[0], parts[1], parts[2]),
260
260
+
_ => format!("/{}/{}/{}/*", parts[0], parts[1], parts[2]),
261
261
+
}
262
262
+
}
+2
-1
crates/weaver-index/src/server.rs
···
2
2
use std::sync::Arc;
3
3
4
4
use axum::{
5
5
-
Json, Router,
5
5
+
Json, Router, middleware,
6
6
extract::State,
7
7
http::{StatusCode, header},
8
8
response::{Html, IntoResponse},
···
166
166
.merge(GetEditHistoryRequest::into_router(edit::get_edit_history))
167
167
.merge(GetContributorsRequest::into_router(edit::get_contributors))
168
168
.merge(ListDraftsRequest::into_router(edit::list_drafts))
169
169
+
.layer(middleware::from_fn(telemetry::http_metrics))
169
170
.layer(TraceLayer::new_for_http())
170
171
.layer(CorsLayer::permissive().max_age(std::time::Duration::from_secs(86400)))
171
172
.with_state(state)
+58
-5
docker-compose.yml
···
6
6
ports:
7
7
- "80:80"
8
8
- "443:443"
9
9
+
- "2019:2019" # Admin API with metrics
9
10
volumes:
10
11
- ./infra/caddy/Caddyfile:/etc/caddy/Caddyfile:ro
11
12
- caddy_data:/data
12
13
- caddy_config:/config
13
14
environment:
14
15
CLOUDFLARE_API_TOKEN: ${CLOUDFLARE_API_TOKEN}
15
15
-
ACME_EMAIL: ${ACME_EMAIL:-admin@weaver.sh}
16
16
+
ACME_EMAIL: ${ACME_EMAIL:-contact@weaver.sh}
16
17
depends_on:
17
18
- weaver-app
18
19
- index
19
20
restart: unless-stopped
20
21
21
21
-
# ClickHouse - analytics database (internal only, no host ports exposed)
22
22
+
# ClickHouse - analytics database
22
23
clickhouse:
23
24
image: clickhouse/clickhouse-server:25.11
24
25
container_name: weaver-clickhouse
25
25
-
# No ports exposed to host - only accessible via docker network
26
26
ports:
27
27
- "8123:8123"
28
28
- "9000:9000"
29
29
+
- "9363:9363" # Prometheus metrics
29
30
volumes:
30
31
- ~/data/clickhouse:/var/lib/clickhouse
31
32
- ~/data/clickhouse-logs:/var/log/clickhouse-server
32
33
- ~/data/clickhouse-config:/etc/clickhouse-server/config.d
34
34
+
- ./infra/clickhouse/prometheus.xml:/etc/clickhouse-server/config.d/prometheus.xml:ro
33
35
environment:
34
36
CLICKHOUSE_DB: ${CLICKHOUSE_DATABASE:-weaver}
35
37
CLICKHOUSE_USER: ${CLICKHOUSE_USER:-default}
···
62
64
image: ghcr.io/bluesky-social/indigo/tap:latest
63
65
ports:
64
66
- "2480:2480"
67
67
+
- "2481:2481"
65
68
volumes:
66
69
- tap_data:/data/tap
67
70
environment:
68
71
TAP_DATABASE_URL: sqlite:///data/tap/tap.db
69
72
TAP_BIND: ":2480"
73
73
+
TAP_METRICS_LISTEN: ":2481"
70
74
TAP_DISABLE_ACKS: "false"
71
71
-
TAP_LOG_LEVEL: info
75
75
+
TAP_LOG_LEVEL: debug
72
76
TAP_OUTBOX_PARALLELISM: 5
73
77
#TAP_FULL_NETWORK: true
74
78
#TAP_SIGNAL_COLLECTION: place.stream.chat.profile
···
92
96
- index_data:/app/data
93
97
environment:
94
98
RUST_LOG: info,weaver_index=debug,hyper_util::client::legacy::pool=info
99
99
+
LOKI_URL: ${LOKI_URL:-}
95
100
CLICKHOUSE_URL: http://clickhouse:8123
96
101
CLICKHOUSE_DATABASE: ${CLICKHOUSE_DATABASE:-weaver}
97
102
CLICKHOUSE_USER: ${CLICKHOUSE_USER:-default}
···
122
127
environment:
123
128
PORT: 8080
124
129
IP: 0.0.0.0
125
125
-
RUST_LOG: info
130
130
+
RUST_LOG: info,weaver-app=debug,weaver-common=debug,hyper=warn,hyper_util=warn,tower=warn,h2=warn,rustls=warn,reqwest=info,dioxus_core=warn,dioxus_signals=warn
131
131
+
LOKI_URL: ${LOKI_URL:-}
126
132
healthcheck:
127
133
test: ["CMD", "wget", "-q", "--spider", "http://localhost:8080/"]
128
134
interval: 20s
129
135
timeout: 5s
130
136
retries: 3
137
137
+
restart: unless-stopped
138
138
+
139
139
+
# ============ OBSERVABILITY STACK ============
140
140
+
141
141
+
# Node exporter - host metrics (CPU, memory, disk, network)
142
142
+
node-exporter:
143
143
+
image: prom/node-exporter:latest
144
144
+
container_name: weaver-node-exporter
145
145
+
ports:
146
146
+
- "9100:9100"
147
147
+
volumes:
148
148
+
- /proc:/host/proc:ro
149
149
+
- /sys:/host/sys:ro
150
150
+
- /:/rootfs:ro
151
151
+
command:
152
152
+
- "--path.procfs=/host/proc"
153
153
+
- "--path.sysfs=/host/sys"
154
154
+
- "--path.rootfs=/rootfs"
155
155
+
- "--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)"
156
156
+
restart: unless-stopped
157
157
+
158
158
+
# cAdvisor - container metrics (per-container CPU, memory, network)
159
159
+
cadvisor:
160
160
+
image: gcr.io/cadvisor/cadvisor:latest
161
161
+
container_name: weaver-cadvisor
162
162
+
ports:
163
163
+
- "9080:8080"
164
164
+
volumes:
165
165
+
- /:/rootfs:ro
166
166
+
- /var/run:/var/run:ro
167
167
+
- /sys:/sys:ro
168
168
+
- /var/lib/docker/:/var/lib/docker:ro
169
169
+
- /dev/disk/:/dev/disk:ro
170
170
+
privileged: true
171
171
+
restart: unless-stopped
172
172
+
173
173
+
# Promtail - ship container logs to Loki
174
174
+
promtail:
175
175
+
image: grafana/promtail:latest
176
176
+
container_name: weaver-promtail
177
177
+
volumes:
178
178
+
- ./infra/promtail/config.yml:/etc/promtail/config.yml:ro
179
179
+
- /var/lib/docker/containers:/var/lib/docker/containers:ro
180
180
+
- /var/run/docker.sock:/var/run/docker.sock:ro
181
181
+
command: -config.file=/etc/promtail/config.yml -config.expand-env=true
182
182
+
environment:
183
183
+
LOKI_URL: ${LOKI_URL:-http://localhost:3100}
131
184
restart: unless-stopped
132
185
133
186
volumes:
+6
infra/caddy/Caddyfile
···
4
4
on_demand_tls {
5
5
ask http://index:3000/internal/verify-domain
6
6
}
7
7
+
8
8
+
admin 0.0.0.0:2019
9
9
+
10
10
+
servers {
11
11
+
metrics
12
12
+
}
7
13
}
8
14
9
15
# Index service
+9
infra/clickhouse/prometheus.xml
···
1
1
+
<clickhouse>
2
2
+
<prometheus>
3
3
+
<endpoint>/metrics</endpoint>
4
4
+
<port>9363</port>
5
5
+
<metrics>true</metrics>
6
6
+
<events>true</events>
7
7
+
<asynchronous_metrics>true</asynchronous_metrics>
8
8
+
</prometheus>
9
9
+
</clickhouse>
+414
infra/grafana/dashboards/weaver-clickhouse.json
···
1
1
+
{
2
2
+
"annotations": { "list": [] },
3
3
+
"editable": true,
4
4
+
"fiscalYearStartMonth": 0,
5
5
+
"graphTooltip": 1,
6
6
+
"links": [],
7
7
+
"panels": [
8
8
+
{
9
9
+
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
10
10
+
"id": 100,
11
11
+
"title": "ClickHouse Overview",
12
12
+
"type": "row"
13
13
+
},
14
14
+
{
15
15
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
16
16
+
"fieldConfig": {
17
17
+
"defaults": {
18
18
+
"color": { "mode": "thresholds" },
19
19
+
"mappings": [],
20
20
+
"thresholds": {
21
21
+
"mode": "absolute",
22
22
+
"steps": [
23
23
+
{ "color": "red", "value": null },
24
24
+
{ "color": "green", "value": 1 }
25
25
+
]
26
26
+
}
27
27
+
}
28
28
+
},
29
29
+
"gridPos": { "h": 4, "w": 4, "x": 0, "y": 1 },
30
30
+
"id": 1,
31
31
+
"options": {
32
32
+
"colorMode": "background",
33
33
+
"graphMode": "none",
34
34
+
"justifyMode": "auto",
35
35
+
"orientation": "auto",
36
36
+
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
37
37
+
},
38
38
+
"targets": [
39
39
+
{
40
40
+
"expr": "up{service=\"weaver-clickhouse\"}",
41
41
+
"refId": "A"
42
42
+
}
43
43
+
],
44
44
+
"title": "Status",
45
45
+
"type": "stat"
46
46
+
},
47
47
+
{
48
48
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
49
49
+
"fieldConfig": {
50
50
+
"defaults": {
51
51
+
"color": { "mode": "thresholds" },
52
52
+
"mappings": [],
53
53
+
"thresholds": {
54
54
+
"mode": "absolute",
55
55
+
"steps": [
56
56
+
{ "color": "green", "value": null },
57
57
+
{ "color": "yellow", "value": 50 },
58
58
+
{ "color": "red", "value": 100 }
59
59
+
]
60
60
+
}
61
61
+
}
62
62
+
},
63
63
+
"gridPos": { "h": 4, "w": 4, "x": 4, "y": 1 },
64
64
+
"id": 2,
65
65
+
"options": {
66
66
+
"colorMode": "value",
67
67
+
"graphMode": "area",
68
68
+
"justifyMode": "auto",
69
69
+
"orientation": "auto",
70
70
+
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
71
71
+
},
72
72
+
"targets": [
73
73
+
{
74
74
+
"expr": "ClickHouseMetrics_Query{service=\"weaver-clickhouse\"}",
75
75
+
"refId": "A"
76
76
+
}
77
77
+
],
78
78
+
"title": "Active Queries",
79
79
+
"type": "stat"
80
80
+
},
81
81
+
{
82
82
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
83
83
+
"fieldConfig": {
84
84
+
"defaults": {
85
85
+
"color": { "mode": "thresholds" },
86
86
+
"mappings": [],
87
87
+
"unit": "bytes",
88
88
+
"thresholds": {
89
89
+
"mode": "absolute",
90
90
+
"steps": [
91
91
+
{ "color": "green", "value": null },
92
92
+
{ "color": "yellow", "value": 4294967296 },
93
93
+
{ "color": "red", "value": 8589934592 }
94
94
+
]
95
95
+
}
96
96
+
}
97
97
+
},
98
98
+
"gridPos": { "h": 4, "w": 4, "x": 8, "y": 1 },
99
99
+
"id": 3,
100
100
+
"options": {
101
101
+
"colorMode": "value",
102
102
+
"graphMode": "area",
103
103
+
"justifyMode": "auto",
104
104
+
"orientation": "auto",
105
105
+
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
106
106
+
},
107
107
+
"targets": [
108
108
+
{
109
109
+
"expr": "ClickHouseMetrics_MemoryTracking{service=\"weaver-clickhouse\"}",
110
110
+
"refId": "A"
111
111
+
}
112
112
+
],
113
113
+
"title": "Memory Used",
114
114
+
"type": "stat"
115
115
+
},
116
116
+
{
117
117
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
118
118
+
"fieldConfig": {
119
119
+
"defaults": {
120
120
+
"color": { "mode": "thresholds" },
121
121
+
"mappings": [],
122
122
+
"thresholds": {
123
123
+
"mode": "absolute",
124
124
+
"steps": [
125
125
+
{ "color": "green", "value": null },
126
126
+
{ "color": "yellow", "value": 50 },
127
127
+
{ "color": "red", "value": 100 }
128
128
+
]
129
129
+
}
130
130
+
}
131
131
+
},
132
132
+
"gridPos": { "h": 4, "w": 4, "x": 12, "y": 1 },
133
133
+
"id": 4,
134
134
+
"options": {
135
135
+
"colorMode": "value",
136
136
+
"graphMode": "area",
137
137
+
"justifyMode": "auto",
138
138
+
"orientation": "auto",
139
139
+
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
140
140
+
},
141
141
+
"targets": [
142
142
+
{
143
143
+
"expr": "ClickHouseMetrics_TCPConnection{service=\"weaver-clickhouse\"}",
144
144
+
"refId": "A"
145
145
+
}
146
146
+
],
147
147
+
"title": "TCP Connections",
148
148
+
"type": "stat"
149
149
+
},
150
150
+
{
151
151
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
152
152
+
"fieldConfig": {
153
153
+
"defaults": {
154
154
+
"color": { "mode": "thresholds" },
155
155
+
"mappings": [],
156
156
+
"thresholds": {
157
157
+
"mode": "absolute",
158
158
+
"steps": [
159
159
+
{ "color": "green", "value": null },
160
160
+
{ "color": "yellow", "value": 50 },
161
161
+
{ "color": "red", "value": 100 }
162
162
+
]
163
163
+
}
164
164
+
}
165
165
+
},
166
166
+
"gridPos": { "h": 4, "w": 4, "x": 16, "y": 1 },
167
167
+
"id": 5,
168
168
+
"options": {
169
169
+
"colorMode": "value",
170
170
+
"graphMode": "area",
171
171
+
"justifyMode": "auto",
172
172
+
"orientation": "auto",
173
173
+
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
174
174
+
},
175
175
+
"targets": [
176
176
+
{
177
177
+
"expr": "ClickHouseMetrics_HTTPConnection{service=\"weaver-clickhouse\"}",
178
178
+
"refId": "A"
179
179
+
}
180
180
+
],
181
181
+
"title": "HTTP Connections",
182
182
+
"type": "stat"
183
183
+
},
184
184
+
{
185
185
+
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 5 },
186
186
+
"id": 101,
187
187
+
"title": "Query Performance",
188
188
+
"type": "row"
189
189
+
},
190
190
+
{
191
191
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
192
192
+
"fieldConfig": {
193
193
+
"defaults": {
194
194
+
"color": { "mode": "palette-classic" },
195
195
+
"unit": "short"
196
196
+
}
197
197
+
},
198
198
+
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 6 },
199
199
+
"id": 6,
200
200
+
"options": {
201
201
+
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" },
202
202
+
"tooltip": { "mode": "multi" }
203
203
+
},
204
204
+
"targets": [
205
205
+
{
206
206
+
"expr": "rate(ClickHouseProfileEvents_Query{service=\"weaver-clickhouse\"}[5m])",
207
207
+
"legendFormat": "Queries/s",
208
208
+
"refId": "A"
209
209
+
},
210
210
+
{
211
211
+
"expr": "rate(ClickHouseProfileEvents_SelectQuery{service=\"weaver-clickhouse\"}[5m])",
212
212
+
"legendFormat": "Selects/s",
213
213
+
"refId": "B"
214
214
+
},
215
215
+
{
216
216
+
"expr": "rate(ClickHouseProfileEvents_InsertQuery{service=\"weaver-clickhouse\"}[5m])",
217
217
+
"legendFormat": "Inserts/s",
218
218
+
"refId": "C"
219
219
+
}
220
220
+
],
221
221
+
"title": "Query Rate",
222
222
+
"type": "timeseries"
223
223
+
},
224
224
+
{
225
225
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
226
226
+
"fieldConfig": {
227
227
+
"defaults": {
228
228
+
"color": { "mode": "palette-classic" },
229
229
+
"unit": "short"
230
230
+
}
231
231
+
},
232
232
+
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 6 },
233
233
+
"id": 7,
234
234
+
"options": {
235
235
+
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" },
236
236
+
"tooltip": { "mode": "multi" }
237
237
+
},
238
238
+
"targets": [
239
239
+
{
240
240
+
"expr": "rate(ClickHouseProfileEvents_FailedQuery{service=\"weaver-clickhouse\"}[5m])",
241
241
+
"legendFormat": "Failed",
242
242
+
"refId": "A"
243
243
+
},
244
244
+
{
245
245
+
"expr": "rate(ClickHouseProfileEvents_FailedSelectQuery{service=\"weaver-clickhouse\"}[5m])",
246
246
+
"legendFormat": "Failed Selects",
247
247
+
"refId": "B"
248
248
+
},
249
249
+
{
250
250
+
"expr": "rate(ClickHouseProfileEvents_FailedInsertQuery{service=\"weaver-clickhouse\"}[5m])",
251
251
+
"legendFormat": "Failed Inserts",
252
252
+
"refId": "C"
253
253
+
}
254
254
+
],
255
255
+
"title": "Failed Queries",
256
256
+
"type": "timeseries"
257
257
+
},
258
258
+
{
259
259
+
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 14 },
260
260
+
"id": 102,
261
261
+
"title": "Resources",
262
262
+
"type": "row"
263
263
+
},
264
264
+
{
265
265
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
266
266
+
"fieldConfig": {
267
267
+
"defaults": {
268
268
+
"color": { "mode": "palette-classic" },
269
269
+
"unit": "bytes"
270
270
+
}
271
271
+
},
272
272
+
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 15 },
273
273
+
"id": 8,
274
274
+
"options": {
275
275
+
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" },
276
276
+
"tooltip": { "mode": "multi" }
277
277
+
},
278
278
+
"targets": [
279
279
+
{
280
280
+
"expr": "ClickHouseMetrics_MemoryTracking{service=\"weaver-clickhouse\"}",
281
281
+
"legendFormat": "Memory Tracking",
282
282
+
"refId": "A"
283
283
+
},
284
284
+
{
285
285
+
"expr": "ClickHouseAsyncMetrics_MemoryResident{service=\"weaver-clickhouse\"}",
286
286
+
"legendFormat": "Resident",
287
287
+
"refId": "B"
288
288
+
}
289
289
+
],
290
290
+
"title": "Memory Usage",
291
291
+
"type": "timeseries"
292
292
+
},
293
293
+
{
294
294
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
295
295
+
"fieldConfig": {
296
296
+
"defaults": {
297
297
+
"color": { "mode": "palette-classic" },
298
298
+
"unit": "Bps"
299
299
+
}
300
300
+
},
301
301
+
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 15 },
302
302
+
"id": 9,
303
303
+
"options": {
304
304
+
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" },
305
305
+
"tooltip": { "mode": "multi" }
306
306
+
},
307
307
+
"targets": [
308
308
+
{
309
309
+
"expr": "rate(ClickHouseProfileEvents_ReadBufferFromFileDescriptorReadBytes{service=\"weaver-clickhouse\"}[5m])",
310
310
+
"legendFormat": "Read",
311
311
+
"refId": "A"
312
312
+
},
313
313
+
{
314
314
+
"expr": "rate(ClickHouseProfileEvents_WriteBufferFromFileDescriptorWriteBytes{service=\"weaver-clickhouse\"}[5m])",
315
315
+
"legendFormat": "Write",
316
316
+
"refId": "B"
317
317
+
}
318
318
+
],
319
319
+
"title": "Disk I/O",
320
320
+
"type": "timeseries"
321
321
+
},
322
322
+
{
323
323
+
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 23 },
324
324
+
"id": 103,
325
325
+
"title": "Merges & Parts",
326
326
+
"type": "row"
327
327
+
},
328
328
+
{
329
329
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
330
330
+
"fieldConfig": {
331
331
+
"defaults": {
332
332
+
"color": { "mode": "palette-classic" },
333
333
+
"unit": "short"
334
334
+
}
335
335
+
},
336
336
+
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 24 },
337
337
+
"id": 10,
338
338
+
"options": {
339
339
+
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" },
340
340
+
"tooltip": { "mode": "multi" }
341
341
+
},
342
342
+
"targets": [
343
343
+
{
344
344
+
"expr": "ClickHouseMetrics_Merge{service=\"weaver-clickhouse\"}",
345
345
+
"legendFormat": "Active Merges",
346
346
+
"refId": "A"
347
347
+
},
348
348
+
{
349
349
+
"expr": "ClickHouseMetrics_BackgroundMergesAndMutationsPoolTask{service=\"weaver-clickhouse\"}",
350
350
+
"legendFormat": "Pool Tasks",
351
351
+
"refId": "B"
352
352
+
}
353
353
+
],
354
354
+
"title": "Merge Activity",
355
355
+
"type": "timeseries"
356
356
+
},
357
357
+
{
358
358
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
359
359
+
"fieldConfig": {
360
360
+
"defaults": {
361
361
+
"color": { "mode": "palette-classic" },
362
362
+
"unit": "short"
363
363
+
}
364
364
+
},
365
365
+
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 24 },
366
366
+
"id": 11,
367
367
+
"options": {
368
368
+
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" },
369
369
+
"tooltip": { "mode": "multi" }
370
370
+
},
371
371
+
"targets": [
372
372
+
{
373
373
+
"expr": "ClickHouseMetrics_PartsActive{service=\"weaver-clickhouse\"}",
374
374
+
"legendFormat": "Active Parts",
375
375
+
"refId": "A"
376
376
+
},
377
377
+
{
378
378
+
"expr": "ClickHouseMetrics_PartsOutdated{service=\"weaver-clickhouse\"}",
379
379
+
"legendFormat": "Outdated Parts",
380
380
+
"refId": "B"
381
381
+
}
382
382
+
],
383
383
+
"title": "Parts",
384
384
+
"type": "timeseries"
385
385
+
}
386
386
+
],
387
387
+
"schemaVersion": 39,
388
388
+
"tags": ["weaver", "clickhouse"],
389
389
+
"templating": {
390
390
+
"list": [
391
391
+
{
392
392
+
"current": { "selected": false, "text": "Prometheus", "value": "Prometheus" },
393
393
+
"hide": 0,
394
394
+
"includeAll": false,
395
395
+
"label": "Datasource",
396
396
+
"multi": false,
397
397
+
"name": "datasource",
398
398
+
"options": [],
399
399
+
"query": "prometheus",
400
400
+
"queryValue": "",
401
401
+
"refresh": 1,
402
402
+
"regex": "",
403
403
+
"skipUrlSync": false,
404
404
+
"type": "datasource"
405
405
+
}
406
406
+
]
407
407
+
},
408
408
+
"time": { "from": "now-1h", "to": "now" },
409
409
+
"timepicker": {},
410
410
+
"timezone": "browser",
411
411
+
"title": "Weaver ClickHouse",
412
412
+
"uid": "weaver-clickhouse",
413
413
+
"version": 1
414
414
+
}
+294
infra/grafana/dashboards/weaver-infra.json
···
1
1
+
{
2
2
+
"annotations": { "list": [] },
3
3
+
"editable": true,
4
4
+
"fiscalYearStartMonth": 0,
5
5
+
"graphTooltip": 1,
6
6
+
"links": [],
7
7
+
"panels": [
8
8
+
{
9
9
+
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
10
10
+
"id": 100,
11
11
+
"title": "Host Metrics (booskie-box)",
12
12
+
"type": "row"
13
13
+
},
14
14
+
{
15
15
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
16
16
+
"fieldConfig": {
17
17
+
"defaults": {
18
18
+
"color": { "mode": "palette-classic" },
19
19
+
"unit": "percentunit",
20
20
+
"min": 0,
21
21
+
"max": 1
22
22
+
}
23
23
+
},
24
24
+
"gridPos": { "h": 6, "w": 8, "x": 0, "y": 1 },
25
25
+
"id": 1,
26
26
+
"options": {
27
27
+
"legend": { "displayMode": "list", "placement": "bottom" },
28
28
+
"tooltip": { "mode": "multi" }
29
29
+
},
30
30
+
"targets": [
31
31
+
{
32
32
+
"expr": "1 - avg(rate(node_cpu_seconds_total{service=\"weaver-node\", mode=\"idle\"}[5m]))",
33
33
+
"legendFormat": "CPU Usage",
34
34
+
"refId": "A"
35
35
+
}
36
36
+
],
37
37
+
"title": "CPU Usage",
38
38
+
"type": "timeseries"
39
39
+
},
40
40
+
{
41
41
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
42
42
+
"fieldConfig": {
43
43
+
"defaults": {
44
44
+
"color": { "mode": "palette-classic" },
45
45
+
"unit": "bytes"
46
46
+
}
47
47
+
},
48
48
+
"gridPos": { "h": 6, "w": 8, "x": 8, "y": 1 },
49
49
+
"id": 2,
50
50
+
"options": {
51
51
+
"legend": { "displayMode": "list", "placement": "bottom" },
52
52
+
"tooltip": { "mode": "multi" }
53
53
+
},
54
54
+
"targets": [
55
55
+
{
56
56
+
"expr": "node_memory_MemTotal_bytes{service=\"weaver-node\"} - node_memory_MemAvailable_bytes{service=\"weaver-node\"}",
57
57
+
"legendFormat": "Used",
58
58
+
"refId": "A"
59
59
+
},
60
60
+
{
61
61
+
"expr": "node_memory_MemAvailable_bytes{service=\"weaver-node\"}",
62
62
+
"legendFormat": "Available",
63
63
+
"refId": "B"
64
64
+
}
65
65
+
],
66
66
+
"title": "Memory",
67
67
+
"type": "timeseries"
68
68
+
},
69
69
+
{
70
70
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
71
71
+
"fieldConfig": {
72
72
+
"defaults": {
73
73
+
"color": { "mode": "palette-classic" },
74
74
+
"unit": "percentunit",
75
75
+
"min": 0,
76
76
+
"max": 1
77
77
+
}
78
78
+
},
79
79
+
"gridPos": { "h": 6, "w": 8, "x": 16, "y": 1 },
80
80
+
"id": 3,
81
81
+
"options": {
82
82
+
"legend": { "displayMode": "list", "placement": "bottom" },
83
83
+
"tooltip": { "mode": "multi" }
84
84
+
},
85
85
+
"targets": [
86
86
+
{
87
87
+
"expr": "1 - (node_filesystem_avail_bytes{service=\"weaver-node\", mountpoint=\"/\"} / node_filesystem_size_bytes{service=\"weaver-node\", mountpoint=\"/\"})",
88
88
+
"legendFormat": "/ usage",
89
89
+
"refId": "A"
90
90
+
}
91
91
+
],
92
92
+
"title": "Disk Usage",
93
93
+
"type": "timeseries"
94
94
+
},
95
95
+
{
96
96
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
97
97
+
"fieldConfig": {
98
98
+
"defaults": {
99
99
+
"color": { "mode": "palette-classic" },
100
100
+
"unit": "Bps"
101
101
+
}
102
102
+
},
103
103
+
"gridPos": { "h": 6, "w": 12, "x": 0, "y": 7 },
104
104
+
"id": 4,
105
105
+
"options": {
106
106
+
"legend": { "displayMode": "list", "placement": "bottom" },
107
107
+
"tooltip": { "mode": "multi" }
108
108
+
},
109
109
+
"targets": [
110
110
+
{
111
111
+
"expr": "rate(node_network_receive_bytes_total{service=\"weaver-node\", device!~\"lo|veth.*|docker.*|br-.*\"}[5m])",
112
112
+
"legendFormat": "{{device}} rx",
113
113
+
"refId": "A"
114
114
+
},
115
115
+
{
116
116
+
"expr": "-rate(node_network_transmit_bytes_total{service=\"weaver-node\", device!~\"lo|veth.*|docker.*|br-.*\"}[5m])",
117
117
+
"legendFormat": "{{device}} tx",
118
118
+
"refId": "B"
119
119
+
}
120
120
+
],
121
121
+
"title": "Network I/O",
122
122
+
"type": "timeseries"
123
123
+
},
124
124
+
{
125
125
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
126
126
+
"fieldConfig": {
127
127
+
"defaults": {
128
128
+
"color": { "mode": "palette-classic" },
129
129
+
"unit": "Bps"
130
130
+
}
131
131
+
},
132
132
+
"gridPos": { "h": 6, "w": 12, "x": 12, "y": 7 },
133
133
+
"id": 5,
134
134
+
"options": {
135
135
+
"legend": { "displayMode": "list", "placement": "bottom" },
136
136
+
"tooltip": { "mode": "multi" }
137
137
+
},
138
138
+
"targets": [
139
139
+
{
140
140
+
"expr": "rate(node_disk_read_bytes_total{service=\"weaver-node\"}[5m])",
141
141
+
"legendFormat": "{{device}} read",
142
142
+
"refId": "A"
143
143
+
},
144
144
+
{
145
145
+
"expr": "-rate(node_disk_written_bytes_total{service=\"weaver-node\"}[5m])",
146
146
+
"legendFormat": "{{device}} write",
147
147
+
"refId": "B"
148
148
+
}
149
149
+
],
150
150
+
"title": "Disk I/O",
151
151
+
"type": "timeseries"
152
152
+
},
153
153
+
{
154
154
+
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 13 },
155
155
+
"id": 101,
156
156
+
"title": "Container Metrics",
157
157
+
"type": "row"
158
158
+
},
159
159
+
{
160
160
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
161
161
+
"fieldConfig": {
162
162
+
"defaults": {
163
163
+
"color": { "mode": "palette-classic" },
164
164
+
"unit": "percentunit"
165
165
+
}
166
166
+
},
167
167
+
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 14 },
168
168
+
"id": 6,
169
169
+
"options": {
170
170
+
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" },
171
171
+
"tooltip": { "mode": "multi" }
172
172
+
},
173
173
+
"targets": [
174
174
+
{
175
175
+
"expr": "rate(container_cpu_usage_seconds_total{service=\"weaver-cadvisor\", name=~\"weaver-.*\"}[5m])",
176
176
+
"legendFormat": "{{name}}",
177
177
+
"refId": "A"
178
178
+
}
179
179
+
],
180
180
+
"title": "Container CPU Usage",
181
181
+
"type": "timeseries"
182
182
+
},
183
183
+
{
184
184
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
185
185
+
"fieldConfig": {
186
186
+
"defaults": {
187
187
+
"color": { "mode": "palette-classic" },
188
188
+
"unit": "bytes"
189
189
+
}
190
190
+
},
191
191
+
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 14 },
192
192
+
"id": 7,
193
193
+
"options": {
194
194
+
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" },
195
195
+
"tooltip": { "mode": "multi" }
196
196
+
},
197
197
+
"targets": [
198
198
+
{
199
199
+
"expr": "container_memory_usage_bytes{service=\"weaver-cadvisor\", name=~\"weaver-.*\"}",
200
200
+
"legendFormat": "{{name}}",
201
201
+
"refId": "A"
202
202
+
}
203
203
+
],
204
204
+
"title": "Container Memory Usage",
205
205
+
"type": "timeseries"
206
206
+
},
207
207
+
{
208
208
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
209
209
+
"fieldConfig": {
210
210
+
"defaults": {
211
211
+
"color": { "mode": "palette-classic" },
212
212
+
"unit": "Bps"
213
213
+
}
214
214
+
},
215
215
+
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 22 },
216
216
+
"id": 8,
217
217
+
"options": {
218
218
+
"legend": { "calcs": ["mean"], "displayMode": "table", "placement": "bottom" },
219
219
+
"tooltip": { "mode": "multi" }
220
220
+
},
221
221
+
"targets": [
222
222
+
{
223
223
+
"expr": "rate(container_network_receive_bytes_total{service=\"weaver-cadvisor\", name=~\"weaver-.*\"}[5m])",
224
224
+
"legendFormat": "{{name}} rx",
225
225
+
"refId": "A"
226
226
+
},
227
227
+
{
228
228
+
"expr": "-rate(container_network_transmit_bytes_total{service=\"weaver-cadvisor\", name=~\"weaver-.*\"}[5m])",
229
229
+
"legendFormat": "{{name}} tx",
230
230
+
"refId": "B"
231
231
+
}
232
232
+
],
233
233
+
"title": "Container Network I/O",
234
234
+
"type": "timeseries"
235
235
+
},
236
236
+
{
237
237
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
238
238
+
"fieldConfig": {
239
239
+
"defaults": {
240
240
+
"color": { "mode": "palette-classic" },
241
241
+
"unit": "short"
242
242
+
}
243
243
+
},
244
244
+
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 22 },
245
245
+
"id": 9,
246
246
+
"options": {
247
247
+
"legend": { "displayMode": "table", "placement": "bottom" },
248
248
+
"tooltip": { "mode": "multi" }
249
249
+
},
250
250
+
"targets": [
251
251
+
{
252
252
+
"expr": "container_last_seen{service=\"weaver-cadvisor\", name=~\"weaver-.*\"} - time()",
253
253
+
"legendFormat": "{{name}}",
254
254
+
"refId": "A",
255
255
+
"hide": true
256
256
+
},
257
257
+
{
258
258
+
"expr": "count(container_last_seen{service=\"weaver-cadvisor\", name=~\"weaver-.*\"} > (time() - 60))",
259
259
+
"legendFormat": "Running containers",
260
260
+
"refId": "B"
261
261
+
}
262
262
+
],
263
263
+
"title": "Running Containers",
264
264
+
"type": "stat"
265
265
+
}
266
266
+
],
267
267
+
"schemaVersion": 39,
268
268
+
"tags": ["weaver", "infrastructure"],
269
269
+
"templating": {
270
270
+
"list": [
271
271
+
{
272
272
+
"current": { "selected": false, "text": "Prometheus", "value": "Prometheus" },
273
273
+
"hide": 0,
274
274
+
"includeAll": false,
275
275
+
"label": "Datasource",
276
276
+
"multi": false,
277
277
+
"name": "datasource",
278
278
+
"options": [],
279
279
+
"query": "prometheus",
280
280
+
"queryValue": "",
281
281
+
"refresh": 1,
282
282
+
"regex": "",
283
283
+
"skipUrlSync": false,
284
284
+
"type": "datasource"
285
285
+
}
286
286
+
]
287
287
+
},
288
288
+
"time": { "from": "now-1h", "to": "now" },
289
289
+
"timepicker": {},
290
290
+
"timezone": "browser",
291
291
+
"title": "Weaver Infrastructure",
292
292
+
"uid": "weaver-infra",
293
293
+
"version": 1
294
294
+
}
+943
infra/grafana/dashboards/weaver-overview.json
···
1
1
+
{
2
2
+
"annotations": {
3
3
+
"list": [
4
4
+
{
5
5
+
"builtIn": 1,
6
6
+
"datasource": {
7
7
+
"type": "grafana",
8
8
+
"uid": "-- Grafana --"
9
9
+
},
10
10
+
"enable": true,
11
11
+
"hide": true,
12
12
+
"iconColor": "rgba(0, 211, 255, 1)",
13
13
+
"name": "Annotations & Alerts",
14
14
+
"type": "dashboard"
15
15
+
}
16
16
+
]
17
17
+
},
18
18
+
"editable": true,
19
19
+
"fiscalYearStartMonth": 0,
20
20
+
"graphTooltip": 1,
21
21
+
"id": 0,
22
22
+
"links": [],
23
23
+
"panels": [
24
24
+
{
25
25
+
"collapsed": false,
26
26
+
"gridPos": {
27
27
+
"h": 1,
28
28
+
"w": 24,
29
29
+
"x": 0,
30
30
+
"y": 0
31
31
+
},
32
32
+
"id": 100,
33
33
+
"panels": [],
34
34
+
"title": "Weaver Services",
35
35
+
"type": "row"
36
36
+
},
37
37
+
{
38
38
+
"datasource": {
39
39
+
"type": "prometheus",
40
40
+
"uid": "${datasource}"
41
41
+
},
42
42
+
"fieldConfig": {
43
43
+
"defaults": {
44
44
+
"color": {
45
45
+
"mode": "palette-classic"
46
46
+
},
47
47
+
"custom": {
48
48
+
"axisBorderShow": false,
49
49
+
"axisCenteredZero": false,
50
50
+
"axisColorMode": "text",
51
51
+
"axisLabel": "",
52
52
+
"axisPlacement": "auto",
53
53
+
"barAlignment": 0,
54
54
+
"barWidthFactor": 0.6,
55
55
+
"drawStyle": "line",
56
56
+
"fillOpacity": 0,
57
57
+
"gradientMode": "none",
58
58
+
"hideFrom": {
59
59
+
"legend": false,
60
60
+
"tooltip": false,
61
61
+
"viz": false
62
62
+
},
63
63
+
"insertNulls": false,
64
64
+
"lineInterpolation": "linear",
65
65
+
"lineWidth": 1,
66
66
+
"pointSize": 5,
67
67
+
"scaleDistribution": {
68
68
+
"type": "linear"
69
69
+
},
70
70
+
"showPoints": "auto",
71
71
+
"showValues": false,
72
72
+
"spanNulls": false,
73
73
+
"stacking": {
74
74
+
"group": "A",
75
75
+
"mode": "none"
76
76
+
},
77
77
+
"thresholdsStyle": {
78
78
+
"mode": "off"
79
79
+
}
80
80
+
},
81
81
+
"mappings": [],
82
82
+
"thresholds": {
83
83
+
"mode": "absolute",
84
84
+
"steps": [
85
85
+
{
86
86
+
"color": "green",
87
87
+
"value": 0
88
88
+
}
89
89
+
]
90
90
+
},
91
91
+
"unit": "reqps"
92
92
+
},
93
93
+
"overrides": []
94
94
+
},
95
95
+
"gridPos": {
96
96
+
"h": 8,
97
97
+
"w": 12,
98
98
+
"x": 0,
99
99
+
"y": 1
100
100
+
},
101
101
+
"id": 1,
102
102
+
"options": {
103
103
+
"legend": {
104
104
+
"calcs": ["mean", "max"],
105
105
+
"displayMode": "table",
106
106
+
"placement": "bottom",
107
107
+
"showLegend": true
108
108
+
},
109
109
+
"tooltip": {
110
110
+
"hideZeros": false,
111
111
+
"mode": "multi",
112
112
+
"sort": "none"
113
113
+
}
114
114
+
},
115
115
+
"pluginVersion": "12.3.1",
116
116
+
"targets": [
117
117
+
{
118
118
+
"expr": "rate(http_requests_total{service=~\"weaver-.*\"}[5m])",
119
119
+
"legendFormat": "{{service}} {{method}} {{status}}",
120
120
+
"refId": "A"
121
121
+
}
122
122
+
],
123
123
+
"title": "Request Rate",
124
124
+
"type": "timeseries"
125
125
+
},
126
126
+
{
127
127
+
"datasource": {
128
128
+
"type": "prometheus",
129
129
+
"uid": "${datasource}"
130
130
+
},
131
131
+
"fieldConfig": {
132
132
+
"defaults": {
133
133
+
"color": {
134
134
+
"mode": "palette-classic"
135
135
+
},
136
136
+
"custom": {
137
137
+
"axisBorderShow": false,
138
138
+
"axisCenteredZero": false,
139
139
+
"axisColorMode": "text",
140
140
+
"axisLabel": "",
141
141
+
"axisPlacement": "auto",
142
142
+
"barAlignment": 0,
143
143
+
"barWidthFactor": 0.6,
144
144
+
"drawStyle": "line",
145
145
+
"fillOpacity": 0,
146
146
+
"gradientMode": "none",
147
147
+
"hideFrom": {
148
148
+
"legend": false,
149
149
+
"tooltip": false,
150
150
+
"viz": false
151
151
+
},
152
152
+
"insertNulls": false,
153
153
+
"lineInterpolation": "linear",
154
154
+
"lineWidth": 1,
155
155
+
"pointSize": 5,
156
156
+
"scaleDistribution": {
157
157
+
"type": "linear"
158
158
+
},
159
159
+
"showPoints": "auto",
160
160
+
"showValues": false,
161
161
+
"spanNulls": false,
162
162
+
"stacking": {
163
163
+
"group": "A",
164
164
+
"mode": "none"
165
165
+
},
166
166
+
"thresholdsStyle": {
167
167
+
"mode": "off"
168
168
+
}
169
169
+
},
170
170
+
"mappings": [],
171
171
+
"thresholds": {
172
172
+
"mode": "absolute",
173
173
+
"steps": [
174
174
+
{
175
175
+
"color": "green",
176
176
+
"value": 0
177
177
+
},
178
178
+
{
179
179
+
"color": "yellow",
180
180
+
"value": 0.5
181
181
+
},
182
182
+
{
183
183
+
"color": "red",
184
184
+
"value": 1
185
185
+
}
186
186
+
]
187
187
+
},
188
188
+
"unit": "s"
189
189
+
},
190
190
+
"overrides": []
191
191
+
},
192
192
+
"gridPos": {
193
193
+
"h": 8,
194
194
+
"w": 12,
195
195
+
"x": 12,
196
196
+
"y": 1
197
197
+
},
198
198
+
"id": 2,
199
199
+
"options": {
200
200
+
"legend": {
201
201
+
"calcs": ["mean", "max", "p99"],
202
202
+
"displayMode": "table",
203
203
+
"placement": "bottom",
204
204
+
"showLegend": true
205
205
+
},
206
206
+
"tooltip": {
207
207
+
"hideZeros": false,
208
208
+
"mode": "multi",
209
209
+
"sort": "none"
210
210
+
}
211
211
+
},
212
212
+
"pluginVersion": "12.3.1",
213
213
+
"targets": [
214
214
+
{
215
215
+
"expr": "histogram_quantile(0.99, rate(http_request_duration_seconds_bucket{service=~\"weaver-.*\"}[5m]))",
216
216
+
"legendFormat": "{{service}} p99",
217
217
+
"refId": "A"
218
218
+
},
219
219
+
{
220
220
+
"expr": "histogram_quantile(0.50, rate(http_request_duration_seconds_bucket{service=~\"weaver-.*\"}[5m]))",
221
221
+
"legendFormat": "{{service}} p50",
222
222
+
"refId": "B"
223
223
+
}
224
224
+
],
225
225
+
"title": "Request Latency",
226
226
+
"type": "timeseries"
227
227
+
},
228
228
+
{
229
229
+
"datasource": {
230
230
+
"type": "prometheus",
231
231
+
"uid": "${datasource}"
232
232
+
},
233
233
+
"fieldConfig": {
234
234
+
"defaults": {
235
235
+
"color": {
236
236
+
"mode": "palette-classic"
237
237
+
},
238
238
+
"custom": {
239
239
+
"axisBorderShow": false,
240
240
+
"axisCenteredZero": false,
241
241
+
"axisColorMode": "text",
242
242
+
"axisLabel": "",
243
243
+
"axisPlacement": "auto",
244
244
+
"barAlignment": 0,
245
245
+
"barWidthFactor": 0.6,
246
246
+
"drawStyle": "line",
247
247
+
"fillOpacity": 0,
248
248
+
"gradientMode": "none",
249
249
+
"hideFrom": {
250
250
+
"legend": false,
251
251
+
"tooltip": false,
252
252
+
"viz": false
253
253
+
},
254
254
+
"insertNulls": false,
255
255
+
"lineInterpolation": "linear",
256
256
+
"lineWidth": 1,
257
257
+
"pointSize": 5,
258
258
+
"scaleDistribution": {
259
259
+
"type": "linear"
260
260
+
},
261
261
+
"showPoints": "auto",
262
262
+
"showValues": false,
263
263
+
"spanNulls": false,
264
264
+
"stacking": {
265
265
+
"group": "A",
266
266
+
"mode": "none"
267
267
+
},
268
268
+
"thresholdsStyle": {
269
269
+
"mode": "off"
270
270
+
}
271
271
+
},
272
272
+
"mappings": [],
273
273
+
"thresholds": {
274
274
+
"mode": "absolute",
275
275
+
"steps": [
276
276
+
{
277
277
+
"color": "green",
278
278
+
"value": 0
279
279
+
},
280
280
+
{
281
281
+
"color": "red",
282
282
+
"value": 1
283
283
+
}
284
284
+
]
285
285
+
},
286
286
+
"unit": "short"
287
287
+
},
288
288
+
"overrides": []
289
289
+
},
290
290
+
"gridPos": {
291
291
+
"h": 8,
292
292
+
"w": 12,
293
293
+
"x": 0,
294
294
+
"y": 9
295
295
+
},
296
296
+
"id": 3,
297
297
+
"options": {
298
298
+
"legend": {
299
299
+
"calcs": ["sum"],
300
300
+
"displayMode": "table",
301
301
+
"placement": "bottom",
302
302
+
"showLegend": true
303
303
+
},
304
304
+
"tooltip": {
305
305
+
"hideZeros": false,
306
306
+
"mode": "multi",
307
307
+
"sort": "none"
308
308
+
}
309
309
+
},
310
310
+
"pluginVersion": "12.3.1",
311
311
+
"targets": [
312
312
+
{
313
313
+
"expr": "rate(http_requests_total{service=~\"weaver-.*\", status=~\"5..\"}[5m])",
314
314
+
"legendFormat": "{{service}} {{status}}",
315
315
+
"refId": "A"
316
316
+
}
317
317
+
],
318
318
+
"title": "Error Rate (5xx)",
319
319
+
"type": "timeseries"
320
320
+
},
321
321
+
{
322
322
+
"datasource": {
323
323
+
"type": "prometheus",
324
324
+
"uid": "${datasource}"
325
325
+
},
326
326
+
"fieldConfig": {
327
327
+
"defaults": {
328
328
+
"color": {
329
329
+
"mode": "thresholds"
330
330
+
},
331
331
+
"mappings": [
332
332
+
{
333
333
+
"options": {
334
334
+
"0": {
335
335
+
"color": "red",
336
336
+
"text": "DOWN"
337
337
+
},
338
338
+
"1": {
339
339
+
"color": "green",
340
340
+
"text": "UP"
341
341
+
}
342
342
+
},
343
343
+
"type": "value"
344
344
+
}
345
345
+
],
346
346
+
"thresholds": {
347
347
+
"mode": "absolute",
348
348
+
"steps": [
349
349
+
{
350
350
+
"color": "red",
351
351
+
"value": 0
352
352
+
},
353
353
+
{
354
354
+
"color": "green",
355
355
+
"value": 1
356
356
+
}
357
357
+
]
358
358
+
}
359
359
+
},
360
360
+
"overrides": []
361
361
+
},
362
362
+
"gridPos": {
363
363
+
"h": 4,
364
364
+
"w": 6,
365
365
+
"x": 12,
366
366
+
"y": 9
367
367
+
},
368
368
+
"id": 4,
369
369
+
"options": {
370
370
+
"colorMode": "background",
371
371
+
"graphMode": "none",
372
372
+
"justifyMode": "auto",
373
373
+
"orientation": "horizontal",
374
374
+
"percentChangeColorMode": "standard",
375
375
+
"reduceOptions": {
376
376
+
"calcs": ["lastNotNull"],
377
377
+
"fields": "",
378
378
+
"values": false
379
379
+
},
380
380
+
"showPercentChange": false,
381
381
+
"textMode": "auto",
382
382
+
"wideLayout": true
383
383
+
},
384
384
+
"pluginVersion": "12.3.1",
385
385
+
"targets": [
386
386
+
{
387
387
+
"expr": "up{service=\"weaver-index\"}",
388
388
+
"legendFormat": "weaver-index",
389
389
+
"refId": "A"
390
390
+
}
391
391
+
],
392
392
+
"title": "Index Status",
393
393
+
"type": "stat"
394
394
+
},
395
395
+
{
396
396
+
"datasource": {
397
397
+
"type": "prometheus",
398
398
+
"uid": "${datasource}"
399
399
+
},
400
400
+
"fieldConfig": {
401
401
+
"defaults": {
402
402
+
"color": {
403
403
+
"mode": "thresholds"
404
404
+
},
405
405
+
"mappings": [
406
406
+
{
407
407
+
"options": {
408
408
+
"0": {
409
409
+
"color": "red",
410
410
+
"text": "DOWN"
411
411
+
},
412
412
+
"1": {
413
413
+
"color": "green",
414
414
+
"text": "UP"
415
415
+
}
416
416
+
},
417
417
+
"type": "value"
418
418
+
}
419
419
+
],
420
420
+
"thresholds": {
421
421
+
"mode": "absolute",
422
422
+
"steps": [
423
423
+
{
424
424
+
"color": "red",
425
425
+
"value": 0
426
426
+
},
427
427
+
{
428
428
+
"color": "green",
429
429
+
"value": 1
430
430
+
}
431
431
+
]
432
432
+
}
433
433
+
},
434
434
+
"overrides": []
435
435
+
},
436
436
+
"gridPos": {
437
437
+
"h": 4,
438
438
+
"w": 6,
439
439
+
"x": 18,
440
440
+
"y": 9
441
441
+
},
442
442
+
"id": 5,
443
443
+
"options": {
444
444
+
"colorMode": "background",
445
445
+
"graphMode": "none",
446
446
+
"justifyMode": "auto",
447
447
+
"orientation": "horizontal",
448
448
+
"percentChangeColorMode": "standard",
449
449
+
"reduceOptions": {
450
450
+
"calcs": ["lastNotNull"],
451
451
+
"fields": "",
452
452
+
"values": false
453
453
+
},
454
454
+
"showPercentChange": false,
455
455
+
"textMode": "auto",
456
456
+
"wideLayout": true
457
457
+
},
458
458
+
"pluginVersion": "12.3.1",
459
459
+
"targets": [
460
460
+
{
461
461
+
"expr": "up{service=\"weaver-app\"}",
462
462
+
"legendFormat": "weaver-app",
463
463
+
"refId": "A"
464
464
+
}
465
465
+
],
466
466
+
"title": "App Status",
467
467
+
"type": "stat"
468
468
+
},
469
469
+
{
470
470
+
"collapsed": false,
471
471
+
"gridPos": {
472
472
+
"h": 1,
473
473
+
"w": 24,
474
474
+
"x": 0,
475
475
+
"y": 17
476
476
+
},
477
477
+
"id": 101,
478
478
+
"panels": [],
479
479
+
"title": "Caddy Proxy",
480
480
+
"type": "row"
481
481
+
},
482
482
+
{
483
483
+
"datasource": {
484
484
+
"type": "prometheus",
485
485
+
"uid": "${datasource}"
486
486
+
},
487
487
+
"fieldConfig": {
488
488
+
"defaults": {
489
489
+
"color": {
490
490
+
"mode": "palette-classic"
491
491
+
},
492
492
+
"custom": {
493
493
+
"axisBorderShow": false,
494
494
+
"axisCenteredZero": false,
495
495
+
"axisColorMode": "text",
496
496
+
"axisLabel": "",
497
497
+
"axisPlacement": "auto",
498
498
+
"barAlignment": 0,
499
499
+
"barWidthFactor": 0.6,
500
500
+
"drawStyle": "line",
501
501
+
"fillOpacity": 0,
502
502
+
"gradientMode": "none",
503
503
+
"hideFrom": {
504
504
+
"legend": false,
505
505
+
"tooltip": false,
506
506
+
"viz": false
507
507
+
},
508
508
+
"insertNulls": false,
509
509
+
"lineInterpolation": "linear",
510
510
+
"lineWidth": 1,
511
511
+
"pointSize": 5,
512
512
+
"scaleDistribution": {
513
513
+
"type": "linear"
514
514
+
},
515
515
+
"showPoints": "auto",
516
516
+
"showValues": false,
517
517
+
"spanNulls": false,
518
518
+
"stacking": {
519
519
+
"group": "A",
520
520
+
"mode": "none"
521
521
+
},
522
522
+
"thresholdsStyle": {
523
523
+
"mode": "off"
524
524
+
}
525
525
+
},
526
526
+
"mappings": [],
527
527
+
"thresholds": {
528
528
+
"mode": "absolute",
529
529
+
"steps": [
530
530
+
{
531
531
+
"color": "green",
532
532
+
"value": 0
533
533
+
},
534
534
+
{
535
535
+
"color": "red",
536
536
+
"value": 80
537
537
+
}
538
538
+
]
539
539
+
},
540
540
+
"unit": "reqps"
541
541
+
},
542
542
+
"overrides": []
543
543
+
},
544
544
+
"gridPos": {
545
545
+
"h": 8,
546
546
+
"w": 12,
547
547
+
"x": 0,
548
548
+
"y": 18
549
549
+
},
550
550
+
"id": 6,
551
551
+
"options": {
552
552
+
"legend": {
553
553
+
"calcs": ["mean", "max"],
554
554
+
"displayMode": "table",
555
555
+
"placement": "bottom",
556
556
+
"showLegend": true
557
557
+
},
558
558
+
"tooltip": {
559
559
+
"hideZeros": false,
560
560
+
"mode": "multi",
561
561
+
"sort": "none"
562
562
+
}
563
563
+
},
564
564
+
"pluginVersion": "12.3.1",
565
565
+
"targets": [
566
566
+
{
567
567
+
"expr": "rate(caddy_admin_http_requests_total{service=\"weaver-caddy\"}[5m])",
568
568
+
"legendFormat": "{{handler}} {{code}}",
569
569
+
"refId": "A"
570
570
+
}
571
571
+
],
572
572
+
"title": "Caddy Request Rate",
573
573
+
"type": "timeseries"
574
574
+
},
575
575
+
{
576
576
+
"datasource": {
577
577
+
"type": "prometheus",
578
578
+
"uid": "${datasource}"
579
579
+
},
580
580
+
"fieldConfig": {
581
581
+
"defaults": {
582
582
+
"color": {
583
583
+
"mode": "palette-classic"
584
584
+
},
585
585
+
"custom": {
586
586
+
"axisBorderShow": false,
587
587
+
"axisCenteredZero": false,
588
588
+
"axisColorMode": "text",
589
589
+
"axisLabel": "",
590
590
+
"axisPlacement": "auto",
591
591
+
"barAlignment": 0,
592
592
+
"barWidthFactor": 0.6,
593
593
+
"drawStyle": "line",
594
594
+
"fillOpacity": 0,
595
595
+
"gradientMode": "none",
596
596
+
"hideFrom": {
597
597
+
"legend": false,
598
598
+
"tooltip": false,
599
599
+
"viz": false
600
600
+
},
601
601
+
"insertNulls": false,
602
602
+
"lineInterpolation": "linear",
603
603
+
"lineWidth": 1,
604
604
+
"pointSize": 5,
605
605
+
"scaleDistribution": {
606
606
+
"type": "linear"
607
607
+
},
608
608
+
"showPoints": "auto",
609
609
+
"showValues": false,
610
610
+
"spanNulls": false,
611
611
+
"stacking": {
612
612
+
"group": "A",
613
613
+
"mode": "none"
614
614
+
},
615
615
+
"thresholdsStyle": {
616
616
+
"mode": "off"
617
617
+
}
618
618
+
},
619
619
+
"mappings": [],
620
620
+
"thresholds": {
621
621
+
"mode": "absolute",
622
622
+
"steps": [
623
623
+
{
624
624
+
"color": "green",
625
625
+
"value": 0
626
626
+
},
627
627
+
{
628
628
+
"color": "red",
629
629
+
"value": 80
630
630
+
}
631
631
+
]
632
632
+
},
633
633
+
"unit": "s"
634
634
+
},
635
635
+
"overrides": []
636
636
+
},
637
637
+
"gridPos": {
638
638
+
"h": 8,
639
639
+
"w": 12,
640
640
+
"x": 12,
641
641
+
"y": 18
642
642
+
},
643
643
+
"id": 7,
644
644
+
"options": {
645
645
+
"legend": {
646
646
+
"calcs": ["mean", "max"],
647
647
+
"displayMode": "table",
648
648
+
"placement": "bottom",
649
649
+
"showLegend": true
650
650
+
},
651
651
+
"tooltip": {
652
652
+
"hideZeros": false,
653
653
+
"mode": "multi",
654
654
+
"sort": "none"
655
655
+
}
656
656
+
},
657
657
+
"pluginVersion": "12.3.1",
658
658
+
"targets": [
659
659
+
{
660
660
+
"expr": "histogram_quantile(0.99, rate(caddy_http_request_duration_seconds_bucket{service=\"weaver-caddy\"}[5m]))",
661
661
+
"legendFormat": "p99",
662
662
+
"refId": "A"
663
663
+
},
664
664
+
{
665
665
+
"expr": "histogram_quantile(0.50, rate(caddy_http_request_duration_seconds_bucket{service=\"weaver-caddy\"}[5m]))",
666
666
+
"legendFormat": "p50",
667
667
+
"refId": "B"
668
668
+
}
669
669
+
],
670
670
+
"title": "Caddy Latency",
671
671
+
"type": "timeseries"
672
672
+
},
673
673
+
{
674
674
+
"collapsed": false,
675
675
+
"gridPos": {
676
676
+
"h": 1,
677
677
+
"w": 24,
678
678
+
"x": 0,
679
679
+
"y": 26
680
680
+
},
681
681
+
"id": 102,
682
682
+
"panels": [],
683
683
+
"title": "Logs",
684
684
+
"type": "row"
685
685
+
},
686
686
+
{
687
687
+
"datasource": {
688
688
+
"type": "loki",
689
689
+
"uid": "${loki}"
690
690
+
},
691
691
+
"fieldConfig": {
692
692
+
"defaults": {},
693
693
+
"overrides": []
694
694
+
},
695
695
+
"gridPos": {
696
696
+
"h": 10,
697
697
+
"w": 12,
698
698
+
"x": 0,
699
699
+
"y": 27
700
700
+
},
701
701
+
"id": 8,
702
702
+
"options": {
703
703
+
"dedupStrategy": "none",
704
704
+
"enableInfiniteScrolling": false,
705
705
+
"enableLogDetails": true,
706
706
+
"prettifyLogMessage": false,
707
707
+
"showCommonLabels": false,
708
708
+
"showControls": false,
709
709
+
"showLabels": false,
710
710
+
"showTime": true,
711
711
+
"sortOrder": "Descending",
712
712
+
"wrapLogMessage": true
713
713
+
},
714
714
+
"pluginVersion": "12.3.1",
715
715
+
"targets": [
716
716
+
{
717
717
+
"expr": "{service_name=\"weaver-index\"} |= ``",
718
718
+
"refId": "A"
719
719
+
}
720
720
+
],
721
721
+
"title": "Index Logs",
722
722
+
"type": "logs"
723
723
+
},
724
724
+
{
725
725
+
"datasource": {
726
726
+
"type": "loki",
727
727
+
"uid": "${loki}"
728
728
+
},
729
729
+
"fieldConfig": {
730
730
+
"defaults": {},
731
731
+
"overrides": []
732
732
+
},
733
733
+
"gridPos": {
734
734
+
"h": 10,
735
735
+
"w": 12,
736
736
+
"x": 12,
737
737
+
"y": 27
738
738
+
},
739
739
+
"id": 9,
740
740
+
"options": {
741
741
+
"dedupStrategy": "none",
742
742
+
"enableInfiniteScrolling": false,
743
743
+
"enableLogDetails": true,
744
744
+
"prettifyLogMessage": false,
745
745
+
"showCommonLabels": false,
746
746
+
"showControls": false,
747
747
+
"showLabels": false,
748
748
+
"showTime": true,
749
749
+
"sortOrder": "Descending",
750
750
+
"wrapLogMessage": true
751
751
+
},
752
752
+
"pluginVersion": "12.3.1",
753
753
+
"targets": [
754
754
+
{
755
755
+
"direction": "backward",
756
756
+
"editorMode": "code",
757
757
+
"expr": "{service=\"weaver-app\"} != `dioxus_core` or `hyper_util` or `dioxus_signals` or `reqwest` or `axum`",
758
758
+
"queryType": "range",
759
759
+
"refId": "A"
760
760
+
}
761
761
+
],
762
762
+
"title": "App Logs",
763
763
+
"type": "logs"
764
764
+
},
765
765
+
{
766
766
+
"datasource": {
767
767
+
"type": "loki",
768
768
+
"uid": "${loki}"
769
769
+
},
770
770
+
"fieldConfig": {
771
771
+
"defaults": {
772
772
+
"color": {
773
773
+
"mode": "palette-classic"
774
774
+
},
775
775
+
"custom": {
776
776
+
"axisBorderShow": false,
777
777
+
"axisCenteredZero": false,
778
778
+
"axisColorMode": "text",
779
779
+
"axisLabel": "",
780
780
+
"axisPlacement": "auto",
781
781
+
"barAlignment": 0,
782
782
+
"barWidthFactor": 0.6,
783
783
+
"drawStyle": "line",
784
784
+
"fillOpacity": 0,
785
785
+
"gradientMode": "none",
786
786
+
"hideFrom": {
787
787
+
"legend": false,
788
788
+
"tooltip": false,
789
789
+
"viz": false
790
790
+
},
791
791
+
"insertNulls": false,
792
792
+
"lineInterpolation": "linear",
793
793
+
"lineWidth": 1,
794
794
+
"pointSize": 5,
795
795
+
"scaleDistribution": {
796
796
+
"type": "linear"
797
797
+
},
798
798
+
"showPoints": "auto",
799
799
+
"showValues": false,
800
800
+
"spanNulls": false,
801
801
+
"stacking": {
802
802
+
"group": "A",
803
803
+
"mode": "none"
804
804
+
},
805
805
+
"thresholdsStyle": {
806
806
+
"mode": "off"
807
807
+
}
808
808
+
},
809
809
+
"mappings": [],
810
810
+
"thresholds": {
811
811
+
"mode": "absolute",
812
812
+
"steps": [
813
813
+
{
814
814
+
"color": "green",
815
815
+
"value": 0
816
816
+
},
817
817
+
{
818
818
+
"color": "red",
819
819
+
"value": 80
820
820
+
}
821
821
+
]
822
822
+
}
823
823
+
},
824
824
+
"overrides": []
825
825
+
},
826
826
+
"gridPos": {
827
827
+
"h": 6,
828
828
+
"w": 24,
829
829
+
"x": 0,
830
830
+
"y": 37
831
831
+
},
832
832
+
"id": 10,
833
833
+
"options": {
834
834
+
"legend": {
835
835
+
"calcs": [],
836
836
+
"displayMode": "list",
837
837
+
"placement": "bottom",
838
838
+
"showLegend": true
839
839
+
},
840
840
+
"tooltip": {
841
841
+
"hideZeros": false,
842
842
+
"mode": "multi",
843
843
+
"sort": "none"
844
844
+
}
845
845
+
},
846
846
+
"pluginVersion": "12.3.1",
847
847
+
"targets": [
848
848
+
{
849
849
+
"expr": "sum by (service_name) (count_over_time({service_name=~\"weaver-.*\"} | level=~\"error|ERROR|err\" [1m]))",
850
850
+
"legendFormat": "{{service_name}}",
851
851
+
"refId": "A"
852
852
+
}
853
853
+
],
854
854
+
"title": "Error Log Rate",
855
855
+
"type": "timeseries"
856
856
+
},
857
857
+
{
858
858
+
"datasource": {
859
859
+
"type": "loki",
860
860
+
"uid": "${loki}"
861
861
+
},
862
862
+
"fieldConfig": {
863
863
+
"defaults": {},
864
864
+
"overrides": []
865
865
+
},
866
866
+
"gridPos": {
867
867
+
"h": 8,
868
868
+
"w": 24,
869
869
+
"x": 0,
870
870
+
"y": 43
871
871
+
},
872
872
+
"id": 11,
873
873
+
"options": {
874
874
+
"dedupStrategy": "none",
875
875
+
"enableInfiniteScrolling": false,
876
876
+
"enableLogDetails": true,
877
877
+
"prettifyLogMessage": false,
878
878
+
"showCommonLabels": false,
879
879
+
"showControls": false,
880
880
+
"showLabels": true,
881
881
+
"showTime": true,
882
882
+
"sortOrder": "Descending",
883
883
+
"wrapLogMessage": true
884
884
+
},
885
885
+
"pluginVersion": "12.3.1",
886
886
+
"targets": [
887
887
+
{
888
888
+
"direction": "backward",
889
889
+
"editorMode": "code",
890
890
+
"expr": "{container_name=~\"weaver-clickhouse|weaver-caddy|weaver-tap\"} |= ``",
891
891
+
"queryType": "range",
892
892
+
"refId": "A"
893
893
+
}
894
894
+
],
895
895
+
"title": "Infrastructure Logs",
896
896
+
"type": "logs"
897
897
+
}
898
898
+
],
899
899
+
"preload": false,
900
900
+
"schemaVersion": 42,
901
901
+
"tags": ["weaver"],
902
902
+
"templating": {
903
903
+
"list": [
904
904
+
{
905
905
+
"current": {
906
906
+
"text": "Prometheus",
907
907
+
"value": "PBFA97CFB590B2093"
908
908
+
},
909
909
+
"includeAll": false,
910
910
+
"label": "Metrics",
911
911
+
"name": "datasource",
912
912
+
"options": [],
913
913
+
"query": "prometheus",
914
914
+
"refresh": 1,
915
915
+
"regex": "",
916
916
+
"type": "datasource"
917
917
+
},
918
918
+
{
919
919
+
"current": {
920
920
+
"text": "Loki",
921
921
+
"value": "P8E80F9AEF21F6940"
922
922
+
},
923
923
+
"includeAll": false,
924
924
+
"label": "Logs",
925
925
+
"name": "loki",
926
926
+
"options": [],
927
927
+
"query": "loki",
928
928
+
"refresh": 1,
929
929
+
"regex": "",
930
930
+
"type": "datasource"
931
931
+
}
932
932
+
]
933
933
+
},
934
934
+
"time": {
935
935
+
"from": "now-1h",
936
936
+
"to": "now"
937
937
+
},
938
938
+
"timepicker": {},
939
939
+
"timezone": "browser",
940
940
+
"title": "Weaver Overview",
941
941
+
"uid": "weaver-overview",
942
942
+
"version": 9
943
943
+
}
+412
infra/grafana/dashboards/weaver-tap.json
···
1
1
+
{
2
2
+
"annotations": { "list": [] },
3
3
+
"editable": true,
4
4
+
"fiscalYearStartMonth": 0,
5
5
+
"graphTooltip": 1,
6
6
+
"links": [],
7
7
+
"panels": [
8
8
+
{
9
9
+
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 },
10
10
+
"id": 100,
11
11
+
"title": "Firehose",
12
12
+
"type": "row"
13
13
+
},
14
14
+
{
15
15
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
16
16
+
"fieldConfig": {
17
17
+
"defaults": {
18
18
+
"color": { "mode": "palette-classic" },
19
19
+
"unit": "short"
20
20
+
}
21
21
+
},
22
22
+
"gridPos": { "h": 6, "w": 8, "x": 0, "y": 1 },
23
23
+
"id": 1,
24
24
+
"options": {
25
25
+
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" },
26
26
+
"tooltip": { "mode": "multi" }
27
27
+
},
28
28
+
"targets": [
29
29
+
{
30
30
+
"expr": "rate(tap_firehose_events_received_total{service=\"weaver-tap\"}[5m])",
31
31
+
"legendFormat": "Received",
32
32
+
"refId": "A"
33
33
+
},
34
34
+
{
35
35
+
"expr": "rate(tap_firehose_events_processed_total{service=\"weaver-tap\"}[5m])",
36
36
+
"legendFormat": "Processed",
37
37
+
"refId": "B"
38
38
+
},
39
39
+
{
40
40
+
"expr": "rate(tap_firehose_events_skipped_total{service=\"weaver-tap\"}[5m])",
41
41
+
"legendFormat": "Skipped",
42
42
+
"refId": "C"
43
43
+
}
44
44
+
],
45
45
+
"title": "Firehose Event Rate",
46
46
+
"type": "timeseries"
47
47
+
},
48
48
+
{
49
49
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
50
50
+
"fieldConfig": {
51
51
+
"defaults": {
52
52
+
"color": { "mode": "thresholds" },
53
53
+
"thresholds": {
54
54
+
"mode": "absolute",
55
55
+
"steps": [
56
56
+
{ "color": "green", "value": null }
57
57
+
]
58
58
+
},
59
59
+
"unit": "none"
60
60
+
}
61
61
+
},
62
62
+
"gridPos": { "h": 6, "w": 4, "x": 8, "y": 1 },
63
63
+
"id": 2,
64
64
+
"options": {
65
65
+
"colorMode": "value",
66
66
+
"graphMode": "area",
67
67
+
"justifyMode": "auto",
68
68
+
"orientation": "auto",
69
69
+
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
70
70
+
},
71
71
+
"targets": [
72
72
+
{
73
73
+
"expr": "tap_firehose_last_seq{service=\"weaver-tap\"}",
74
74
+
"legendFormat": "Last Seq",
75
75
+
"refId": "A"
76
76
+
}
77
77
+
],
78
78
+
"title": "Last Sequence",
79
79
+
"type": "stat"
80
80
+
},
81
81
+
{
82
82
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
83
83
+
"fieldConfig": {
84
84
+
"defaults": {
85
85
+
"color": { "mode": "thresholds" },
86
86
+
"thresholds": {
87
87
+
"mode": "absolute",
88
88
+
"steps": [
89
89
+
{ "color": "green", "value": null },
90
90
+
{ "color": "yellow", "value": 10000 },
91
91
+
{ "color": "red", "value": 50000 }
92
92
+
]
93
93
+
},
94
94
+
"unit": "short"
95
95
+
}
96
96
+
},
97
97
+
"gridPos": { "h": 6, "w": 4, "x": 12, "y": 1 },
98
98
+
"id": 3,
99
99
+
"options": {
100
100
+
"colorMode": "value",
101
101
+
"graphMode": "area",
102
102
+
"justifyMode": "auto",
103
103
+
"orientation": "auto",
104
104
+
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
105
105
+
},
106
106
+
"targets": [
107
107
+
{
108
108
+
"expr": "tap_event_cache_size{service=\"weaver-tap\"}",
109
109
+
"legendFormat": "Cache Size",
110
110
+
"refId": "A"
111
111
+
}
112
112
+
],
113
113
+
"title": "Event Cache Size",
114
114
+
"type": "stat"
115
115
+
},
116
116
+
{
117
117
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
118
118
+
"fieldConfig": {
119
119
+
"defaults": {
120
120
+
"color": { "mode": "palette-classic" },
121
121
+
"unit": "short"
122
122
+
}
123
123
+
},
124
124
+
"gridPos": { "h": 6, "w": 8, "x": 16, "y": 1 },
125
125
+
"id": 4,
126
126
+
"options": {
127
127
+
"legend": { "calcs": ["sum"], "displayMode": "table", "placement": "bottom" },
128
128
+
"tooltip": { "mode": "multi" }
129
129
+
},
130
130
+
"targets": [
131
131
+
{
132
132
+
"expr": "increase(tap_firehose_events_received_total{service=\"weaver-tap\"}[1h])",
133
133
+
"legendFormat": "Received",
134
134
+
"refId": "A"
135
135
+
},
136
136
+
{
137
137
+
"expr": "increase(tap_firehose_events_processed_total{service=\"weaver-tap\"}[1h])",
138
138
+
"legendFormat": "Processed",
139
139
+
"refId": "B"
140
140
+
}
141
141
+
],
142
142
+
"title": "Events (1h)",
143
143
+
"type": "timeseries"
144
144
+
},
145
145
+
{
146
146
+
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 7 },
147
147
+
"id": 101,
148
148
+
"title": "Resyncs",
149
149
+
"type": "row"
150
150
+
},
151
151
+
{
152
152
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
153
153
+
"fieldConfig": {
154
154
+
"defaults": {
155
155
+
"color": { "mode": "palette-classic" },
156
156
+
"unit": "short"
157
157
+
}
158
158
+
},
159
159
+
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
160
160
+
"id": 5,
161
161
+
"options": {
162
162
+
"legend": { "calcs": ["sum"], "displayMode": "table", "placement": "bottom" },
163
163
+
"tooltip": { "mode": "multi" }
164
164
+
},
165
165
+
"targets": [
166
166
+
{
167
167
+
"expr": "rate(tap_resyncs_started_total{service=\"weaver-tap\"}[5m])",
168
168
+
"legendFormat": "Started",
169
169
+
"refId": "A"
170
170
+
},
171
171
+
{
172
172
+
"expr": "rate(tap_resyncs_completed_total{service=\"weaver-tap\"}[5m])",
173
173
+
"legendFormat": "Completed",
174
174
+
"refId": "B"
175
175
+
},
176
176
+
{
177
177
+
"expr": "rate(tap_resyncs_failed_total{service=\"weaver-tap\"}[5m])",
178
178
+
"legendFormat": "Failed",
179
179
+
"refId": "C"
180
180
+
}
181
181
+
],
182
182
+
"title": "Resync Rate",
183
183
+
"type": "timeseries"
184
184
+
},
185
185
+
{
186
186
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
187
187
+
"fieldConfig": {
188
188
+
"defaults": {
189
189
+
"color": { "mode": "palette-classic" },
190
190
+
"unit": "s"
191
191
+
}
192
192
+
},
193
193
+
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
194
194
+
"id": 6,
195
195
+
"options": {
196
196
+
"legend": { "calcs": ["mean", "max", "p99"], "displayMode": "table", "placement": "bottom" },
197
197
+
"tooltip": { "mode": "multi" }
198
198
+
},
199
199
+
"targets": [
200
200
+
{
201
201
+
"expr": "histogram_quantile(0.99, rate(tap_resync_duration_seconds_bucket{service=\"weaver-tap\"}[5m]))",
202
202
+
"legendFormat": "p99",
203
203
+
"refId": "A"
204
204
+
},
205
205
+
{
206
206
+
"expr": "histogram_quantile(0.50, rate(tap_resync_duration_seconds_bucket{service=\"weaver-tap\"}[5m]))",
207
207
+
"legendFormat": "p50",
208
208
+
"refId": "B"
209
209
+
}
210
210
+
],
211
211
+
"title": "Resync Duration",
212
212
+
"type": "timeseries"
213
213
+
},
214
214
+
{
215
215
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
216
216
+
"fieldConfig": {
217
217
+
"defaults": {
218
218
+
"color": { "mode": "thresholds" },
219
219
+
"thresholds": {
220
220
+
"mode": "absolute",
221
221
+
"steps": [
222
222
+
{ "color": "green", "value": null }
223
223
+
]
224
224
+
},
225
225
+
"unit": "short"
226
226
+
}
227
227
+
},
228
228
+
"gridPos": { "h": 4, "w": 4, "x": 0, "y": 16 },
229
229
+
"id": 7,
230
230
+
"options": {
231
231
+
"colorMode": "value",
232
232
+
"graphMode": "none",
233
233
+
"justifyMode": "auto",
234
234
+
"orientation": "auto",
235
235
+
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
236
236
+
},
237
237
+
"targets": [
238
238
+
{
239
239
+
"expr": "tap_resyncs_completed_total{service=\"weaver-tap\"}",
240
240
+
"legendFormat": "Completed",
241
241
+
"refId": "A"
242
242
+
}
243
243
+
],
244
244
+
"title": "Total Completed",
245
245
+
"type": "stat"
246
246
+
},
247
247
+
{
248
248
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
249
249
+
"fieldConfig": {
250
250
+
"defaults": {
251
251
+
"color": { "mode": "thresholds" },
252
252
+
"thresholds": {
253
253
+
"mode": "absolute",
254
254
+
"steps": [
255
255
+
{ "color": "green", "value": null },
256
256
+
{ "color": "red", "value": 1 }
257
257
+
]
258
258
+
},
259
259
+
"unit": "short"
260
260
+
}
261
261
+
},
262
262
+
"gridPos": { "h": 4, "w": 4, "x": 4, "y": 16 },
263
263
+
"id": 8,
264
264
+
"options": {
265
265
+
"colorMode": "value",
266
266
+
"graphMode": "none",
267
267
+
"justifyMode": "auto",
268
268
+
"orientation": "auto",
269
269
+
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }
270
270
+
},
271
271
+
"targets": [
272
272
+
{
273
273
+
"expr": "tap_resyncs_failed_total{service=\"weaver-tap\"}",
274
274
+
"legendFormat": "Failed",
275
275
+
"refId": "A"
276
276
+
}
277
277
+
],
278
278
+
"title": "Total Failed",
279
279
+
"type": "stat"
280
280
+
},
281
281
+
{
282
282
+
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 20 },
283
283
+
"id": 102,
284
284
+
"title": "Event Delivery",
285
285
+
"type": "row"
286
286
+
},
287
287
+
{
288
288
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
289
289
+
"fieldConfig": {
290
290
+
"defaults": {
291
291
+
"color": { "mode": "palette-classic" },
292
292
+
"unit": "short"
293
293
+
}
294
294
+
},
295
295
+
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 21 },
296
296
+
"id": 9,
297
297
+
"options": {
298
298
+
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" },
299
299
+
"tooltip": { "mode": "multi" }
300
300
+
},
301
301
+
"targets": [
302
302
+
{
303
303
+
"expr": "rate(tap_events_delivered_total{service=\"weaver-tap\"}[5m])",
304
304
+
"legendFormat": "Delivered",
305
305
+
"refId": "A"
306
306
+
},
307
307
+
{
308
308
+
"expr": "rate(tap_events_acked_total{service=\"weaver-tap\"}[5m])",
309
309
+
"legendFormat": "Acked",
310
310
+
"refId": "B"
311
311
+
}
312
312
+
],
313
313
+
"title": "Event Delivery Rate",
314
314
+
"type": "timeseries"
315
315
+
},
316
316
+
{
317
317
+
"datasource": { "type": "prometheus", "uid": "${datasource}" },
318
318
+
"fieldConfig": {
319
319
+
"defaults": {
320
320
+
"color": { "mode": "palette-classic" },
321
321
+
"unit": "short"
322
322
+
}
323
323
+
},
324
324
+
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 21 },
325
325
+
"id": 10,
326
326
+
"options": {
327
327
+
"legend": { "calcs": ["mean"], "displayMode": "table", "placement": "bottom" },
328
328
+
"tooltip": { "mode": "multi" }
329
329
+
},
330
330
+
"targets": [
331
331
+
{
332
332
+
"expr": "rate(tap_crawler_repos_discovered_total{service=\"weaver-tap\"}[5m])",
333
333
+
"legendFormat": "Repos Discovered",
334
334
+
"refId": "A"
335
335
+
}
336
336
+
],
337
337
+
"title": "Crawler Discovery Rate",
338
338
+
"type": "timeseries"
339
339
+
},
340
340
+
{
341
341
+
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 29 },
342
342
+
"id": 103,
343
343
+
"title": "Logs",
344
344
+
"type": "row"
345
345
+
},
346
346
+
{
347
347
+
"datasource": { "type": "loki", "uid": "${loki}" },
348
348
+
"gridPos": { "h": 10, "w": 24, "x": 0, "y": 30 },
349
349
+
"id": 11,
350
350
+
"options": {
351
351
+
"dedupStrategy": "none",
352
352
+
"enableLogDetails": true,
353
353
+
"prettifyLogMessage": false,
354
354
+
"showCommonLabels": false,
355
355
+
"showLabels": true,
356
356
+
"showTime": true,
357
357
+
"sortOrder": "Descending",
358
358
+
"wrapLogMessage": true
359
359
+
},
360
360
+
"targets": [
361
361
+
{
362
362
+
"expr": "{container_name=\"weaver-tap\"} |= ``",
363
363
+
"refId": "A"
364
364
+
}
365
365
+
],
366
366
+
"title": "Tap Logs",
367
367
+
"type": "logs"
368
368
+
}
369
369
+
],
370
370
+
"schemaVersion": 39,
371
371
+
"tags": ["weaver", "tap", "atproto"],
372
372
+
"templating": {
373
373
+
"list": [
374
374
+
{
375
375
+
"current": { "selected": false, "text": "Prometheus", "value": "Prometheus" },
376
376
+
"hide": 0,
377
377
+
"includeAll": false,
378
378
+
"label": "Metrics",
379
379
+
"multi": false,
380
380
+
"name": "datasource",
381
381
+
"options": [],
382
382
+
"query": "prometheus",
383
383
+
"queryValue": "",
384
384
+
"refresh": 1,
385
385
+
"regex": "",
386
386
+
"skipUrlSync": false,
387
387
+
"type": "datasource"
388
388
+
},
389
389
+
{
390
390
+
"current": { "selected": false, "text": "Loki", "value": "Loki" },
391
391
+
"hide": 0,
392
392
+
"includeAll": false,
393
393
+
"label": "Logs",
394
394
+
"multi": false,
395
395
+
"name": "loki",
396
396
+
"options": [],
397
397
+
"query": "loki",
398
398
+
"queryValue": "",
399
399
+
"refresh": 1,
400
400
+
"regex": "",
401
401
+
"skipUrlSync": false,
402
402
+
"type": "datasource"
403
403
+
}
404
404
+
]
405
405
+
},
406
406
+
"time": { "from": "now-1h", "to": "now" },
407
407
+
"timepicker": {},
408
408
+
"timezone": "browser",
409
409
+
"title": "Weaver Tap",
410
410
+
"uid": "weaver-tap",
411
411
+
"version": 1
412
412
+
}
+44
infra/promtail/config.yml
···
1
1
+
server:
2
2
+
http_listen_port: 9080
3
3
+
grpc_listen_port: 0
4
4
+
5
5
+
positions:
6
6
+
filename: /tmp/positions.yaml
7
7
+
8
8
+
clients:
9
9
+
- url: ${LOKI_URL}/loki/api/v1/push
10
10
+
11
11
+
scrape_configs:
12
12
+
- job_name: docker
13
13
+
docker_sd_configs:
14
14
+
- host: unix:///var/run/docker.sock
15
15
+
refresh_interval: 5s
16
16
+
relabel_configs:
17
17
+
# Only scrape weaver containers
18
18
+
- source_labels: [__meta_docker_container_name]
19
19
+
regex: "/(weaver-.+)"
20
20
+
action: keep
21
21
+
# Set container_name label (matches dashboard queries)
22
22
+
- source_labels: [__meta_docker_container_name]
23
23
+
regex: "/(.+)"
24
24
+
target_label: container_name
25
25
+
# Add instance label
26
26
+
- target_label: instance
27
27
+
replacement: "booskie-box"
28
28
+
pipeline_stages:
29
29
+
# Parse JSON logs if present
30
30
+
- json:
31
31
+
expressions:
32
32
+
level: level
33
33
+
msg: msg
34
34
+
timestamp: timestamp
35
35
+
# Use extracted level if available
36
36
+
- labels:
37
37
+
level:
38
38
+
# Timestamp from log if available
39
39
+
- timestamp:
40
40
+
source: timestamp
41
41
+
format: RFC3339Nano
42
42
+
fallback_formats:
43
43
+
- RFC3339
44
44
+
- UnixMs