Openstatus
www.openstatus.dev
1import { AsyncLocalStorage } from "node:async_hooks";
2// import * as Sentry from "@sentry/node";
3import { sentry } from "@hono/sentry";
4import {
5 configure,
6 getConsoleSink,
7 getLogger,
8 jsonLinesFormatter,
9 withContext,
10} from "@logtape/logtape";
11import { getOpenTelemetrySink } from "@logtape/otel";
12
13// import { getSentrySink } from "@logtape/sentry";
14import { Hono } from "hono";
15import { showRoutes } from "hono/dev";
16import { requestId } from "hono/request-id";
17// import { logger } from "hono/logger";
18import { checkerRoute } from "./checker";
19import { cronRouter } from "./cron";
20import { env } from "./env";
21
22import { resourceFromAttributes } from "@opentelemetry/resources";
23import { ATTR_DEPLOYMENT_ENVIRONMENT_NAME } from "@opentelemetry/semantic-conventions/incubating";
24import { incidentRoute } from "./incident";
25
26const { NODE_ENV, PORT } = env();
27
28export type Env = {
29 Variables: {
30 event: Record<string, unknown>;
31 };
32};
33
34/**
35 * Tail sampling strategy based on loggingsucks.com best practices
36 * Makes sampling decisions post-request completion to capture:
37 * - All errors (5xx status codes, explicit errors)
38 * - Slow requests (above p99 threshold)
39 * - Client errors (4xx) at higher rate than successful requests
40 * - Random sample of remaining successful, fast requests
41 */
42function shouldSample(event: Record<string, unknown>): boolean {
43 const statusCode = event.status_code as number | undefined;
44 const durationMs = event.duration_ms as number | undefined;
45
46 // Always capture: server errors
47 if (statusCode && statusCode >= 500) return true;
48
49 // Always capture: explicit errors
50 if (event.error) return true;
51
52 // Always capture: slow requests (above p99 - 2s threshold)
53 if (durationMs && durationMs > 2000) return true;
54
55 // Higher sampling for client errors (4xx) - 50%
56 if (statusCode && statusCode >= 400 && statusCode < 500) {
57 return true;
58 }
59
60 // Random sample successful, fast requests at 20%
61 return Math.random() < 0.2;
62}
63
64const defaultLogger = getOpenTelemetrySink({
65 serviceName: "openstatus-workflows",
66 otlpExporterConfig: {
67 url: "https://eu-central-1.aws.edge.axiom.co/v1/logs",
68 headers: {
69 Authorization: `Bearer ${env().AXIOM_TOKEN}`,
70 "X-Axiom-Dataset": env().AXIOM_DATASET,
71 },
72 },
73 additionalResource: resourceFromAttributes({
74 [ATTR_DEPLOYMENT_ENVIRONMENT_NAME]: env().NODE_ENV,
75 }),
76});
77
78await configure({
79 sinks: {
80 console: getConsoleSink({ formatter: jsonLinesFormatter }),
81 // sentry: getSentrySink(),
82 otel: defaultLogger,
83 },
84 loggers: [
85 {
86 category: "workflow",
87 lowestLevel: "debug",
88 sinks: ["console"],
89 },
90 {
91 category: "workflow-otel",
92 lowestLevel: "info",
93 sinks: ["otel"],
94 },
95 ],
96 contextLocalStorage: new AsyncLocalStorage(),
97});
98
99const logger = getLogger(["workflow"]);
100const otelLogger = getLogger(["workflow-otel"]);
101
102const app = new Hono<Env>({ strict: false });
103
104app.use("*", requestId());
105
106app.use("*", sentry({ dsn: env().SENTRY_DSN }));
107
108app.use("*", async (c, next) => {
109 const requestId = c.get("requestId");
110 const startTime = Date.now();
111
112 const event: Record<string, unknown> = {
113 timestamp: new Date().toISOString(),
114 };
115 c.set("event", event);
116
117 await withContext(
118 {
119 request_id: requestId,
120 method: c.req.method,
121 url: c.req.url,
122 user_agent: c.req.header("User-Agent"),
123 // ipAddress: c.req.header("CF-Connecting-IP") || c.req.header("X-Forwarded-For")
124 },
125 async () => {
126 // Build wide event context at request start
127 event.request_id = requestId;
128 event.method = c.req.method;
129 event.path = c.req.path;
130 event.url = c.req.url;
131 event.user_agent = c.req.header("User-Agent");
132 event.content_type = c.req.header("Content-Type");
133 event.cf_ray = c.req.header("CF-Ray");
134 event.cf_connecting_ip = c.req.header("CF-Connecting-IP");
135
136 await next();
137
138 const duration = Date.now() - startTime;
139
140 event.status_code = c.res.status;
141 if (c.error) {
142 event.outcome = "error";
143 event.error = {
144 type: c.error.name,
145 message: c.error.message,
146 stack: c.error.stack,
147 };
148 } else {
149 event.outcome = "success";
150 }
151 event.duration_ms = duration;
152 // Emit canonical log line with all context (wide event pattern)
153 if (shouldSample(event)) {
154 otelLogger.info("request", event);
155 }
156 logger.debug("Request completed", {
157 status_code: c.res.status,
158 duration_ms: duration,
159 request_id: requestId,
160 });
161 },
162 );
163});
164
165app.onError((err, c) => {
166 logger.error("Unhandled request error", {
167 error_name: err.name,
168 error_message: err.message,
169 error_stack: err.stack,
170 method: c.req.method,
171 path: c.req.path,
172 url: c.req.url,
173 request_id: c.get("requestId"),
174 });
175 c.get("sentry").captureException(err);
176
177 return c.json({ error: "Internal server error" }, 500);
178});
179
180app.get("/", (c) => c.text("workflows", 200));
181
182/**
183 * Ping Pong
184 */
185app.get("/ping", (c) => c.json({ ping: "pong" }, 200));
186
187/**
188 * Cron Routes
189 */
190app.route("/cron", cronRouter);
191
192app.route("/", checkerRoute);
193
194app.route("/incident", incidentRoute);
195if (NODE_ENV === "development") {
196 showRoutes(app, { verbose: true, colorize: true });
197}
198
199logger.info("Starting server", { port: PORT, environment: NODE_ENV });
200
201const server = { port: PORT, fetch: app.fetch };
202
203export default server;