apps/workflows/src/index.ts at main · openstatus.dev/openstatus

openstatus.dev / openstatus
fork atom
Openstatus www.openstatus.dev
fork atom
openstatus / apps / workflows / src / index.ts
at main 203 lines 5.5 kB view raw
wrap content
Thibault Le Ouay fix: bug incident (#1772) 2mo ago
6d27a3d3
  1import { AsyncLocalStorage } from "node:async_hooks";
  2// import * as Sentry from "@sentry/node";
  3import { sentry } from "@hono/sentry";
  4import {
  5  configure,
  6  getConsoleSink,
  7  getLogger,
  8  jsonLinesFormatter,
  9  withContext,
 10} from "@logtape/logtape";
 11import { getOpenTelemetrySink } from "@logtape/otel";
 12
 13// import { getSentrySink } from "@logtape/sentry";
 14import { Hono } from "hono";
 15import { showRoutes } from "hono/dev";
 16import { requestId } from "hono/request-id";
 17// import { logger } from "hono/logger";
 18import { checkerRoute } from "./checker";
 19import { cronRouter } from "./cron";
 20import { env } from "./env";
 21
 22import { resourceFromAttributes } from "@opentelemetry/resources";
 23import { ATTR_DEPLOYMENT_ENVIRONMENT_NAME } from "@opentelemetry/semantic-conventions/incubating";
 24import { incidentRoute } from "./incident";
 25
 26const { NODE_ENV, PORT } = env();
 27
 28export type Env = {
 29  Variables: {
 30    event: Record<string, unknown>;
 31  };
 32};
 33
 34/**
 35 * Tail sampling strategy based on loggingsucks.com best practices
 36 * Makes sampling decisions post-request completion to capture:
 37 * - All errors (5xx status codes, explicit errors)
 38 * - Slow requests (above p99 threshold)
 39 * - Client errors (4xx) at higher rate than successful requests
 40 * - Random sample of remaining successful, fast requests
 41 */
 42function shouldSample(event: Record<string, unknown>): boolean {
 43  const statusCode = event.status_code as number | undefined;
 44  const durationMs = event.duration_ms as number | undefined;
 45
 46  // Always capture: server errors
 47  if (statusCode && statusCode >= 500) return true;
 48
 49  // Always capture: explicit errors
 50  if (event.error) return true;
 51
 52  // Always capture: slow requests (above p99 - 2s threshold)
 53  if (durationMs && durationMs > 2000) return true;
 54
 55  // Higher sampling for client errors (4xx) - 50%
 56  if (statusCode && statusCode >= 400 && statusCode < 500) {
 57    return true;
 58  }
 59
 60  // Random sample successful, fast requests at 20%
 61  return Math.random() < 0.2;
 62}
 63
 64const defaultLogger = getOpenTelemetrySink({
 65  serviceName: "openstatus-workflows",
 66  otlpExporterConfig: {
 67    url: "https://eu-central-1.aws.edge.axiom.co/v1/logs",
 68    headers: {
 69      Authorization: `Bearer ${env().AXIOM_TOKEN}`,
 70      "X-Axiom-Dataset": env().AXIOM_DATASET,
 71    },
 72  },
 73  additionalResource: resourceFromAttributes({
 74    [ATTR_DEPLOYMENT_ENVIRONMENT_NAME]: env().NODE_ENV,
 75  }),
 76});
 77
 78await configure({
 79  sinks: {
 80    console: getConsoleSink({ formatter: jsonLinesFormatter }),
 81    // sentry: getSentrySink(),
 82    otel: defaultLogger,
 83  },
 84  loggers: [
 85    {
 86      category: "workflow",
 87      lowestLevel: "debug",
 88      sinks: ["console"],
 89    },
 90    {
 91      category: "workflow-otel",
 92      lowestLevel: "info",
 93      sinks: ["otel"],
 94    },
 95  ],
 96  contextLocalStorage: new AsyncLocalStorage(),
 97});
 98
 99const logger = getLogger(["workflow"]);
100const otelLogger = getLogger(["workflow-otel"]);
101
102const app = new Hono<Env>({ strict: false });
103
104app.use("*", requestId());
105
106app.use("*", sentry({ dsn: env().SENTRY_DSN }));
107
108app.use("*", async (c, next) => {
109  const requestId = c.get("requestId");
110  const startTime = Date.now();
111
112  const event: Record<string, unknown> = {
113    timestamp: new Date().toISOString(),
114  };
115  c.set("event", event);
116
117  await withContext(
118    {
119      request_id: requestId,
120      method: c.req.method,
121      url: c.req.url,
122      user_agent: c.req.header("User-Agent"),
123      // ipAddress: c.req.header("CF-Connecting-IP") || c.req.header("X-Forwarded-For")
124    },
125    async () => {
126      // Build wide event context at request start
127      event.request_id = requestId;
128      event.method = c.req.method;
129      event.path = c.req.path;
130      event.url = c.req.url;
131      event.user_agent = c.req.header("User-Agent");
132      event.content_type = c.req.header("Content-Type");
133      event.cf_ray = c.req.header("CF-Ray");
134      event.cf_connecting_ip = c.req.header("CF-Connecting-IP");
135
136      await next();
137
138      const duration = Date.now() - startTime;
139
140      event.status_code = c.res.status;
141      if (c.error) {
142        event.outcome = "error";
143        event.error = {
144          type: c.error.name,
145          message: c.error.message,
146          stack: c.error.stack,
147        };
148      } else {
149        event.outcome = "success";
150      }
151      event.duration_ms = duration;
152      // Emit canonical log line with all context (wide event pattern)
153      if (shouldSample(event)) {
154        otelLogger.info("request", event);
155      }
156      logger.debug("Request completed", {
157        status_code: c.res.status,
158        duration_ms: duration,
159        request_id: requestId,
160      });
161    },
162  );
163});
164
165app.onError((err, c) => {
166  logger.error("Unhandled request error", {
167    error_name: err.name,
168    error_message: err.message,
169    error_stack: err.stack,
170    method: c.req.method,
171    path: c.req.path,
172    url: c.req.url,
173    request_id: c.get("requestId"),
174  });
175  c.get("sentry").captureException(err);
176
177  return c.json({ error: "Internal server error" }, 500);
178});
179
180app.get("/", (c) => c.text("workflows", 200));
181
182/**
183 * Ping Pong
184 */
185app.get("/ping", (c) => c.json({ ping: "pong" }, 200));
186
187/**
188 * Cron Routes
189 */
190app.route("/cron", cronRouter);
191
192app.route("/", checkerRoute);
193
194app.route("/incident", incidentRoute);
195if (NODE_ENV === "development") {
196  showRoutes(app, { verbose: true, colorize: true });
197}
198
199logger.info("Starting server", { port: PORT, environment: NODE_ENV });
200
201const server = { port: PORT, fetch: app.fetch };
202
203export default server;