my own status page
1import type { Env } from "./types";
2import { getManifest } from "./manifest";
3import { checkHealth } from "./health";
4import { insertPing, getLatestPing, pruneOldPings, createIncident, updateIncident, addIncidentUpdate, getActiveIncidentForService, getActiveIncidents, getRecentlyResolvedIncident, getRecentlyResolvedIncidents, setIncidentGitHub } from "./db";
5import { refreshDevices } from "./tailscale";
6import { handleStatusRoute } from "./routes/status";
7import { handleFavicon } from "./routes/favicon";
8import { handleUptime } from "./routes/uptime";
9import { handleBadgeRoute } from "./routes/badge";
10import { handleIndex } from "./routes/index";
11import { handleIncidentRoute } from "./routes/incidents";
12import { createIssue, assignIssue, commentOnIssue, closeIssue, parseRepo, syncGitHubIncidents } from "./github";
13import { schemas } from "./schemas";
14
15async function handleRequest(request: Request, env: Env): Promise<Response> {
16 const url = new URL(request.url);
17 const path = url.pathname;
18
19 if (path === "/" || path === "") {
20 return handleIndex(env);
21 }
22
23 if (path === "/favicon.svg") {
24 return handleFavicon(env);
25 }
26
27 if (path === "/health") {
28 return Response.json({ ok: true, timestamp: new Date().toISOString() });
29 }
30
31 if (path === "/api/schemas") {
32 return Response.json(schemas);
33 }
34
35 const schemaMatch = path.match(/^\/api\/schemas\/(.+)$/);
36 if (schemaMatch) {
37 const schema = schemas[schemaMatch[1]];
38 if (schema) {
39 return Response.json(schema);
40 }
41 return Response.json({ error: "schema not found" }, { status: 404 });
42 }
43
44 if (path.startsWith("/api/status")) {
45 const res = await handleStatusRoute(env, path);
46 if (res) return res;
47 }
48
49 const uptimeMatch = path.match(/^\/api\/uptime\/(.+)$/);
50 if (uptimeMatch) {
51 return handleUptime(env, uptimeMatch[1], url);
52 }
53
54 if (path.startsWith("/badge")) {
55 const badge = await handleBadgeRoute(env, path, url);
56 if (badge) return badge;
57 }
58
59 if (path.startsWith("/api/incidents")) {
60 const res = await handleIncidentRoute(request, env, path);
61 if (res) return res;
62 }
63
64 return new Response("Not Found", { status: 404 });
65}
66
67export default {
68 async fetch(request: Request, env: Env): Promise<Response> {
69 if (request.method === "OPTIONS") {
70 return new Response(null, {
71 headers: {
72 "Access-Control-Allow-Origin": "*",
73 "Access-Control-Allow-Methods": "GET, POST, PATCH, OPTIONS",
74 "Access-Control-Allow-Headers": "Content-Type, Authorization",
75 },
76 });
77 }
78
79 const response = await handleRequest(request, env);
80 const corsResponse = new Response(response.body, response);
81 corsResponse.headers.set("Access-Control-Allow-Origin", "*");
82 return corsResponse;
83 },
84
85 async scheduled(_controller: ScheduledController, env: Env): Promise<void> {
86 const [manifest] = await Promise.all([
87 getManifest(env),
88 refreshDevices(env),
89 ]);
90
91 const checks = Object.values(manifest).flatMap((machine) => {
92 const triageUrl = machine.triage_url;
93 return machine.services
94 .filter((svc) => svc.health_url)
95 .map(async (svc) => {
96 const previous = await getLatestPing(env.DB, svc.name);
97 const result = await checkHealth(svc);
98 await insertPing(env.DB, svc.name, result.status, result.latency_ms);
99
100 const isDown = result.status === "down" || result.status === "timeout";
101 const wasUp = !previous || previous.status === "up" || previous.status === "degraded";
102
103 if (isDown) {
104 // Track consecutive failures in KV for flap prevention
105 const failKey = `triage:${svc.name}:failures`;
106 const current = parseInt((await env.KV.get(failKey)) ?? "0");
107 const failures = current + 1;
108 await env.KV.put(failKey, String(failures), { expirationTtl: 1800 });
109
110 // Only trigger after 2 consecutive failures (10 min of downtime)
111 if (failures >= 2) {
112 const existing = await getActiveIncidentForService(env.DB, svc.name);
113 if (!existing) {
114 // Check cooldown: no incident resolved in last 15 min
115 const recent = await getRecentlyResolvedIncident(env.DB, svc.name, 900);
116 if (!recent) {
117 const id = await createIncident(env.DB, {
118 service_id: svc.name,
119 title: `${svc.name} is ${result.status}`,
120 severity: "major",
121 });
122
123 // Create GitHub issue on the service's repo
124 if (env.GITHUB_TOKEN && svc.repository) {
125 const parsed = parseRepo(svc.repository);
126 if (parsed) {
127 try {
128 const issueNumber = await createIssue(env.GITHUB_TOKEN, parsed.owner, parsed.repo, {
129 title: `${svc.name} is ${result.status}`,
130 body: `Automated incident detected by [infra.dunkirk.sh](https://infra.dunkirk.sh)\n\n**Service:** ${svc.name}\n**Health URL:** ${svc.health_url}\n**Status:** ${result.status}${result.status_code ? ` (HTTP ${result.status_code})` : ""}${result.error ? ` — ${result.error}` : ""}\n**Latency:** ${result.latency_ms}ms\n**Detected at:** ${new Date().toISOString()}\n\n---\n*Comments on this issue will appear on the status page. Close the issue to resolve the incident.*`,
131 labels: ["incident"],
132 });
133 if (env.GITHUB_ASSIGN_TOKEN && env.GITHUB_ASSIGNEE) {
134 await assignIssue(env.GITHUB_ASSIGN_TOKEN, parsed.owner, parsed.repo, issueNumber, [env.GITHUB_ASSIGNEE]);
135 }
136 await setIncidentGitHub(env.DB, id, `${parsed.owner}/${parsed.repo}`, issueNumber);
137 } catch (_) {} // best effort
138 }
139 }
140
141 // Fire webhook to triage agent (non-blocking)
142 if (triageUrl && env.TRIAGE_AUTH_TOKEN) {
143 fetch(triageUrl, {
144 method: "POST",
145 headers: {
146 "Content-Type": "application/json",
147 Authorization: `Bearer ${env.TRIAGE_AUTH_TOKEN}`,
148 },
149 body: JSON.stringify({
150 incident_id: id,
151 service_id: svc.name,
152 service_name: svc.name,
153 health_url: svc.health_url,
154 callback_url: `https://infra.dunkirk.sh/api/incidents/${id}`,
155 }),
156 }).catch(() => {}); // fire and forget
157 }
158 }
159 }
160 }
161 } else {
162 // Service is up — clear failure counter (only if one exists, to avoid unnecessary KV delete ops)
163 const failKey = `triage:${svc.name}:failures`;
164 if (await env.KV.get(failKey)) {
165 await env.KV.delete(failKey);
166 }
167
168 // Auto-resolve active incidents
169 const active = await getActiveIncidentForService(env.DB, svc.name);
170 if (active) {
171 await updateIncident(env.DB, active.id, {
172 status: "resolved",
173 resolved_at: Math.floor(Date.now() / 1000),
174 });
175 await addIncidentUpdate(env.DB, active.id, "resolved", "Service recovered automatically");
176
177 // Close the GitHub issue
178 if (env.GITHUB_TOKEN && active.github_repo && active.github_issue_number) {
179 const parsed = parseRepo(`https://github.com/${active.github_repo}`);
180 if (parsed) {
181 commentOnIssue(env.GITHUB_TOKEN, parsed.owner, parsed.repo, active.github_issue_number, "Service recovered automatically. Closing issue.").catch(() => {});
182 closeIssue(env.GITHUB_TOKEN, parsed.owner, parsed.repo, active.github_issue_number).catch(() => {});
183 }
184 }
185 }
186 }
187 });
188 });
189
190 await Promise.all(checks);
191 await pruneOldPings(env.DB, 365);
192
193 // Sync GitHub issue comments/state back to incidents
194 if (env.GITHUB_TOKEN) {
195 const active = await getActiveIncidents(env.DB);
196 const recentlyResolved = await getRecentlyResolvedIncidents(env.DB, 86400 * 7);
197 const toSync = [...active, ...recentlyResolved];
198 await syncGitHubIncidents(env.DB, env.KV, env.GITHUB_TOKEN, toSync);
199 }
200 },
201} satisfies ExportedHandler<Env>;