Openstatus www.openstatus.dev
at main 203 lines 5.5 kB view raw
1import { AsyncLocalStorage } from "node:async_hooks"; 2// import * as Sentry from "@sentry/node"; 3import { sentry } from "@hono/sentry"; 4import { 5 configure, 6 getConsoleSink, 7 getLogger, 8 jsonLinesFormatter, 9 withContext, 10} from "@logtape/logtape"; 11import { getOpenTelemetrySink } from "@logtape/otel"; 12 13// import { getSentrySink } from "@logtape/sentry"; 14import { Hono } from "hono"; 15import { showRoutes } from "hono/dev"; 16import { requestId } from "hono/request-id"; 17// import { logger } from "hono/logger"; 18import { checkerRoute } from "./checker"; 19import { cronRouter } from "./cron"; 20import { env } from "./env"; 21 22import { resourceFromAttributes } from "@opentelemetry/resources"; 23import { ATTR_DEPLOYMENT_ENVIRONMENT_NAME } from "@opentelemetry/semantic-conventions/incubating"; 24import { incidentRoute } from "./incident"; 25 26const { NODE_ENV, PORT } = env(); 27 28export type Env = { 29 Variables: { 30 event: Record<string, unknown>; 31 }; 32}; 33 34/** 35 * Tail sampling strategy based on loggingsucks.com best practices 36 * Makes sampling decisions post-request completion to capture: 37 * - All errors (5xx status codes, explicit errors) 38 * - Slow requests (above p99 threshold) 39 * - Client errors (4xx) at higher rate than successful requests 40 * - Random sample of remaining successful, fast requests 41 */ 42function shouldSample(event: Record<string, unknown>): boolean { 43 const statusCode = event.status_code as number | undefined; 44 const durationMs = event.duration_ms as number | undefined; 45 46 // Always capture: server errors 47 if (statusCode && statusCode >= 500) return true; 48 49 // Always capture: explicit errors 50 if (event.error) return true; 51 52 // Always capture: slow requests (above p99 - 2s threshold) 53 if (durationMs && durationMs > 2000) return true; 54 55 // Higher sampling for client errors (4xx) - 50% 56 if (statusCode && statusCode >= 400 && statusCode < 500) { 57 return true; 58 } 59 60 // Random sample successful, fast requests at 20% 61 return Math.random() < 0.2; 62} 63 64const defaultLogger = getOpenTelemetrySink({ 65 serviceName: "openstatus-workflows", 66 otlpExporterConfig: { 67 url: "https://eu-central-1.aws.edge.axiom.co/v1/logs", 68 headers: { 69 Authorization: `Bearer ${env().AXIOM_TOKEN}`, 70 "X-Axiom-Dataset": env().AXIOM_DATASET, 71 }, 72 }, 73 additionalResource: resourceFromAttributes({ 74 [ATTR_DEPLOYMENT_ENVIRONMENT_NAME]: env().NODE_ENV, 75 }), 76}); 77 78await configure({ 79 sinks: { 80 console: getConsoleSink({ formatter: jsonLinesFormatter }), 81 // sentry: getSentrySink(), 82 otel: defaultLogger, 83 }, 84 loggers: [ 85 { 86 category: "workflow", 87 lowestLevel: "debug", 88 sinks: ["console"], 89 }, 90 { 91 category: "workflow-otel", 92 lowestLevel: "info", 93 sinks: ["otel"], 94 }, 95 ], 96 contextLocalStorage: new AsyncLocalStorage(), 97}); 98 99const logger = getLogger(["workflow"]); 100const otelLogger = getLogger(["workflow-otel"]); 101 102const app = new Hono<Env>({ strict: false }); 103 104app.use("*", requestId()); 105 106app.use("*", sentry({ dsn: env().SENTRY_DSN })); 107 108app.use("*", async (c, next) => { 109 const requestId = c.get("requestId"); 110 const startTime = Date.now(); 111 112 const event: Record<string, unknown> = { 113 timestamp: new Date().toISOString(), 114 }; 115 c.set("event", event); 116 117 await withContext( 118 { 119 request_id: requestId, 120 method: c.req.method, 121 url: c.req.url, 122 user_agent: c.req.header("User-Agent"), 123 // ipAddress: c.req.header("CF-Connecting-IP") || c.req.header("X-Forwarded-For") 124 }, 125 async () => { 126 // Build wide event context at request start 127 event.request_id = requestId; 128 event.method = c.req.method; 129 event.path = c.req.path; 130 event.url = c.req.url; 131 event.user_agent = c.req.header("User-Agent"); 132 event.content_type = c.req.header("Content-Type"); 133 event.cf_ray = c.req.header("CF-Ray"); 134 event.cf_connecting_ip = c.req.header("CF-Connecting-IP"); 135 136 await next(); 137 138 const duration = Date.now() - startTime; 139 140 event.status_code = c.res.status; 141 if (c.error) { 142 event.outcome = "error"; 143 event.error = { 144 type: c.error.name, 145 message: c.error.message, 146 stack: c.error.stack, 147 }; 148 } else { 149 event.outcome = "success"; 150 } 151 event.duration_ms = duration; 152 // Emit canonical log line with all context (wide event pattern) 153 if (shouldSample(event)) { 154 otelLogger.info("request", event); 155 } 156 logger.debug("Request completed", { 157 status_code: c.res.status, 158 duration_ms: duration, 159 request_id: requestId, 160 }); 161 }, 162 ); 163}); 164 165app.onError((err, c) => { 166 logger.error("Unhandled request error", { 167 error_name: err.name, 168 error_message: err.message, 169 error_stack: err.stack, 170 method: c.req.method, 171 path: c.req.path, 172 url: c.req.url, 173 request_id: c.get("requestId"), 174 }); 175 c.get("sentry").captureException(err); 176 177 return c.json({ error: "Internal server error" }, 500); 178}); 179 180app.get("/", (c) => c.text("workflows", 200)); 181 182/** 183 * Ping Pong 184 */ 185app.get("/ping", (c) => c.json({ ping: "pong" }, 200)); 186 187/** 188 * Cron Routes 189 */ 190app.route("/cron", cronRouter); 191 192app.route("/", checkerRoute); 193 194app.route("/incident", incidentRoute); 195if (NODE_ENV === "development") { 196 showRoutes(app, { verbose: true, colorize: true }); 197} 198 199logger.info("Starting server", { port: PORT, environment: NODE_ENV }); 200 201const server = { port: PORT, fetch: app.fetch }; 202 203export default server;