Openstatus www.openstatus.dev

๐Ÿฅ… improve error (#404)

* ๐Ÿฅ… improve error

* ๐Ÿฅ… improve error

* ๐Ÿฅ… improve error

* ๐Ÿฅ… improve error

* ๐Ÿ˜ญ shoud fix bun issue with request

* chore: use retry policy

* ๐Ÿ˜ญ shoud fix bun issue with request keepalive

* ๐Ÿ“ more logs

* ๐Ÿ“ more logs

* ๐Ÿ˜ญ lowering retry

* ๐Ÿ’ฉ handling poop url

* ๐Ÿš€

* ๐Ÿงน

* ๐Ÿงน

* ๐Ÿงน

* ๐Ÿงช

---------

Co-authored-by: mxkaske <maximilian@kaske.org>

authored by

Thibault Le Ouay
mxkaske
and committed by
GitHub
a21f46fc 39ca0472

+192 -112
+2 -1
apps/server/.env.example
··· 4 4 UNKEY_TOKEN= 5 5 TINY_BIRD_API_KEY= 6 6 UPSTASH_REDIS_REST_URL= 7 - UPSTASH_REDIS_REST_TOKEN= 7 + UPSTASH_REDIS_REST_TOKEN= 8 + RESEND_API_KEY=
+1 -1
apps/server/fly.toml
··· 19 19 processes = ["app"] 20 20 21 21 [deploy] 22 - strategy = "bluegreen" 22 + strategy = "rolling" 23 23 24 24 [[http_service.checks]] 25 25 grace_period = "10s"
+2 -2
apps/server/src/checker/alerting.ts
··· 3 3 import { db, eq, schema } from "@openstatus/db"; 4 4 import { selectNotificationSchema } from "@openstatus/db/src/schema"; 5 5 6 - import { monitor } from "./checker"; 6 + import { publishPingRetryPolicy } from "./checker"; 7 7 import type { Payload } from "./schema"; 8 8 import { providerToFunction } from "./utils"; 9 9 10 10 export async function catchTooManyRetry(payload: Payload) { 11 - await monitor({ monitorInfo: payload, latency: -1, statusCode: 500 }); 11 + await publishPingRetryPolicy({ payload, latency: -1, statusCode: 500 }); 12 12 if (payload?.status !== "error") { 13 13 await triggerAlerting({ monitorId: payload.monitorId }); 14 14 await updateMonitorStatus({
+7 -6
apps/server/src/checker/checker.test.ts
··· 8 8 } from "@openstatus/tinybird"; 9 9 10 10 import * as alerts from "./alerting"; 11 - import { checker } from "./checker"; 11 + import { checkerRetryPolicy } from "./checker"; 12 12 13 13 vi.mock("@openstatus/tinybird", async () => { 14 14 const actual = await vi.importActual("@openstatus/tinybird"); 15 15 return { 16 16 // @ts-ignore 17 17 ...actual, 18 - publishPingResponse: vi.fn(), 18 + publishPingResponse: vi.fn().mockResolvedValue({ successful_rows: 1 }), 19 19 }; 20 20 }); 21 21 22 22 it("should call updateMonitorStatus when we can fetch", async () => { 23 23 const spyOn = vi.spyOn(alerts, "updateMonitorStatus").mockReturnThis(); 24 - await checker({ 24 + await checkerRetryPolicy({ 25 25 workspaceId: "1", 26 26 monitorId: "1", 27 27 url: "https://www.google.com", ··· 36 36 it("should call updateMonitorStatus when status error", async () => { 37 37 const spyOn = vi.spyOn(alerts, "updateMonitorStatus").mockReturnThis(); 38 38 try { 39 - await checker({ 39 + await checkerRetryPolicy({ 40 40 workspaceId: "1", 41 41 monitorId: "1", 42 42 url: "https://xxxxxxx.fake", ··· 48 48 } catch (e) { 49 49 expect(e).toBeInstanceOf(Error); 50 50 } 51 - expect(spyOn).toHaveBeenCalledTimes(0); 51 + expect(spyOn).toHaveBeenCalledTimes(1); 52 52 }); 53 53 54 54 it("What should we do when redirect ", async () => { 55 55 const spyOn = vi.spyOn(alerts, "updateMonitorStatus").mockReturnThis(); 56 + 56 57 try { 57 - await checker({ 58 + await checkerRetryPolicy({ 58 59 workspaceId: "1", 59 60 monitorId: "1", 60 61 url: "https://www.openstatus.dev/toto",
+71 -70
apps/server/src/checker/checker.ts
··· 1 - import { nanoid } from "nanoid"; 2 - 3 - import { publishPingResponse } from "@openstatus/tinybird"; 4 - 5 - import { env } from "../env"; 6 1 import { updateMonitorStatus } from "./alerting"; 2 + import type { PublishPingType } from "./ping"; 3 + import { pingEndpoint, publishPing } from "./ping"; 7 4 import type { Payload } from "./schema"; 8 5 9 - const region = env.FLY_REGION; 10 - 11 - export const monitor = async ({ 12 - monitorInfo, 6 + // we could have a 'retry' parameter to know how often we should retry 7 + // we could use a setTimeout to retry after a certain amount of time - can be random between 500ms and 10s 8 + export const publishPingRetryPolicy = async ({ 9 + payload, 13 10 latency, 14 11 statusCode, 15 - }: { 16 - monitorInfo: Payload; 17 - latency: number; 18 - statusCode: number; 19 - }) => { 20 - const { monitorId, cronTimestamp, url, workspaceId } = monitorInfo; 21 - 12 + }: PublishPingType) => { 13 + try { 14 + console.log( 15 + `try publish ping to tb - attempt 1 ${JSON.stringify( 16 + payload, 17 + )} with latency ${latency} and status code ${statusCode}`, 18 + ); 19 + await publishPing({ payload, statusCode, latency }); 20 + } catch { 21 + try { 22 + console.log( 23 + "try publish ping to tb - attempt 2 ", 24 + JSON.stringify(payload), 25 + ); 26 + await publishPing({ payload, statusCode, latency }); 27 + } catch (e) { 28 + throw e; 29 + } 30 + } 22 31 console.log( 23 - `publishing ping response for ${url} with status ${statusCode} and latency ${latency} and monitorId ${monitorId} `, 32 + `Successfully published ${JSON.stringify( 33 + payload, 34 + )} with latency ${latency} and status code ${statusCode}`, 24 35 ); 25 - await publishPingResponse({ 26 - id: nanoid(), // TBD: we don't need it 27 - timestamp: Date.now(), 28 - statusCode, 29 - latency, 30 - region, 31 - url, 32 - monitorId, 33 - cronTimestamp, 34 - workspaceId, 35 - }); 36 36 }; 37 37 38 - export const checker = async (data: Payload) => { 39 - const startTime = Date.now(); 40 - const res = await ping(data); 41 - const endTime = Date.now(); 38 + const run = async (data: Payload, retry?: number | undefined) => { 39 + let startTime = 0; 40 + let endTime = 0; 41 + let res = null; 42 + // We are doing these for wrong urls 43 + try { 44 + startTime = Date.now(); 45 + res = await pingEndpoint(data); 46 + endTime = Date.now(); 47 + } catch (e) { 48 + console.log("error on pingEndpoint", e); 49 + endTime = Date.now(); 50 + } 51 + 42 52 const latency = endTime - startTime; 43 53 if (res?.ok) { 44 - await monitor({ monitorInfo: data, latency, statusCode: res.status }); 54 + await publishPingRetryPolicy({ 55 + payload: data, 56 + latency, 57 + statusCode: res.status, 58 + }); 45 59 if (data?.status === "error") { 46 60 await updateMonitorStatus({ 47 61 monitorId: data.monitorId, ··· 49 63 }); 50 64 } 51 65 } else { 52 - console.log(`first retry for ${data.url} with status ${res?.status}`); 53 - const startTime = Date.now(); 54 - const retry = await ping(data); 55 - const endTime = Date.now(); 56 - const latency = endTime - startTime; 57 - if (retry?.ok) { 58 - await monitor({ monitorInfo: data, latency, statusCode: retry.status }); 59 - if (data?.status === "error") { 66 + if (retry === 0) { 67 + throw new Error(`error on ping for ${data.monitorId}`); 68 + } 69 + // Store the error on third task retry 70 + if (retry === 1) { 71 + await publishPingRetryPolicy({ 72 + payload: data, 73 + latency, 74 + statusCode: res?.status || 0, 75 + }); 76 + if (data?.status === "active") { 60 77 await updateMonitorStatus({ 61 78 monitorId: data.monitorId, 62 - status: "active", 79 + status: "error", 63 80 }); 64 81 } 65 - } else { 66 - console.log( 67 - `error for ${JSON.stringify(data)} with info ${JSON.stringify(retry)}`, 68 - ); 69 82 } 70 83 } 84 + return { res, latency }; 71 85 }; 72 86 73 - export const ping = async ( 74 - data: Pick<Payload, "headers" | "body" | "method" | "url">, 75 - ) => { 76 - const headers = 77 - data?.headers?.reduce((o, v) => { 78 - if (v.key.trim() === "") return o; // removes empty keys from the header 79 - return { ...o, [v.key]: v.value }; 80 - }, {}) || {}; 81 - 87 + export const checkerRetryPolicy = async (data: Payload, retry = 0) => { 82 88 try { 83 - const res = await fetch(data?.url, { 84 - method: data?.method, 85 - cache: "no-store", 86 - headers: { 87 - "OpenStatus-Ping": "true", 88 - ...headers, 89 - }, 90 - // Avoid having "TypeError: Request with a GET or HEAD method cannot have a body." error 91 - ...(data.method === "POST" && { body: data?.body }), 92 - }); 93 - 94 - return res; 95 - } catch (e) { 96 - console.log(`fetch error for : ${data} with error ${e}`); 97 - console.log(e); 89 + console.log("try run checker - attempt 1 ", JSON.stringify(data)); 90 + await run(data, 0); 91 + } catch { 92 + try { 93 + console.log("try run checker - attempt 2 ", JSON.stringify(data)); 94 + await run(data, 1); 95 + } catch (e) { 96 + throw e; 97 + } 98 98 } 99 + console.log("successfully run checker ", JSON.stringify(data)); 99 100 };
+19 -12
apps/server/src/checker/index.ts
··· 1 1 import { Hono } from "hono"; 2 2 3 3 import { env } from "../env"; 4 - import { catchTooManyRetry } from "./alerting"; 5 - import { checker } from "./checker"; 4 + import { checkerRetryPolicy } from "./checker"; 6 5 import { payloadSchema } from "./schema"; 7 6 import type { Payload } from "./schema"; 8 7 ··· 30 29 console.error(result.error); 31 30 return c.text("Unprocessable Entity", 422); 32 31 } 33 - 34 - if (Number(c.req.header("X-CloudTasks-TaskRetryCount") || 0) > 5) { 32 + const retry = Number(c.req.header("X-CloudTasks-TaskRetryCount") || 0); 33 + if (retry > 3) { 35 34 console.error( 36 35 `catchTooManyRetry for ${JSON.stringify(result.data)} 37 36 )}`, 38 37 ); 39 38 // catchTooManyRetry(result.data); 40 - return c.text("Ok", 200); // needs to be 200, otherwise qstash will retry 39 + return c.text("Ok", 200); // finish the task 41 40 } 42 41 43 42 console.log(`Google Checker should try this: ${result.data.url}`); ··· 46 45 console.log( 47 46 `start checker URL: ${result.data.url} monitorId ${result.data.monitorId}`, 48 47 ); 49 - checker(result.data); 48 + await checkerRetryPolicy(result.data, retry); 50 49 console.log( 51 50 `end checker URL: ${result.data.url} monitorId ${result.data.monitorId}`, 52 51 ); 53 52 return c.text("Ok", 200); 54 53 } catch (e) { 55 - console.error(e); 54 + console.error( 55 + `fail checker URL: ${result.data.url} monitorId ${result.data.monitorId}`, 56 + JSON.stringify(result.data), 57 + e, 58 + ); 56 59 return c.text("Internal Server Error", 500); 57 60 } 58 61 }); ··· 73 76 console.error(result.error); 74 77 return c.text("Unprocessable Entity", 422); 75 78 } 76 - 77 - if (Number(c.req.header("X-CloudTasks-TaskRetryCount") || 0) > 5) { 79 + const retry = Number(c.req.header("X-CloudTasks-TaskRetryCount") || 0); 80 + if (retry > 1) { 78 81 console.error( 79 82 `catchTooManyRetry for ${JSON.stringify(result.data)} 80 83 )}`, 81 84 ); 82 85 // catchTooManyRetry(result.data); 83 - return c.text("Ok", 200); // needs to be 200, otherwise qstash will retry 86 + return c.text("Ok", 200); // finish the task 84 87 } 85 88 86 89 console.log(`Google Checker should try this: ${result.data.url}`); ··· 89 92 console.log( 90 93 `start checker URL: ${result.data.url} monitorId ${result.data.monitorId}`, 91 94 ); 92 - checker(result.data); 95 + await checkerRetryPolicy(result.data, retry); 93 96 console.log( 94 97 `end checker URL: ${result.data.url} monitorId ${result.data.monitorId}`, 95 98 ); 96 99 return c.text("Ok", 200); 97 100 } catch (e) { 98 - console.error(e); 101 + console.error( 102 + `fail checker URL: ${result.data.url} monitorId ${result.data.monitorId}`, 103 + JSON.stringify(result.data), 104 + e, 105 + ); 99 106 return c.text("Internal Server Error", 500); 100 107 } 101 108 });
+77
apps/server/src/checker/ping.ts
··· 1 + import { nanoid } from "nanoid"; 2 + 3 + import { publishPingResponse } from "@openstatus/tinybird"; 4 + 5 + import { env } from "../env"; 6 + import { fakePromiseWithRandomResolve } from "../utils/random-promise"; 7 + import type { Payload } from "./schema"; 8 + 9 + const region = env.FLY_REGION; 10 + 11 + function getHeaders(data?: Payload) { 12 + const customHeaders = 13 + data?.headers?.reduce((o, v) => { 14 + // removes empty keys from the header 15 + if (v.key.trim() === "") return o; 16 + return { ...o, [v.key]: v.value }; 17 + }, {}) || {}; 18 + return { 19 + "OpenStatus-Ping": "true", 20 + ...customHeaders, 21 + }; 22 + } 23 + 24 + export async function pingEndpoint(data: Payload) { 25 + try { 26 + const res = await fetch(data?.url, { 27 + method: data?.method, 28 + keepalive: false, 29 + cache: "no-store", 30 + headers: getHeaders(data), 31 + // Avoid having "TypeError: Request with a GET or HEAD method cannot have a body." error 32 + ...(data.method === "POST" && { body: data?.body }), 33 + }); 34 + 35 + return res; 36 + } catch (e) { 37 + throw e; 38 + } 39 + } 40 + 41 + export type PublishPingType = { 42 + payload: Payload; 43 + statusCode: number; 44 + latency: number; 45 + }; 46 + 47 + export async function publishPing({ 48 + payload, 49 + statusCode, 50 + latency, 51 + }: PublishPingType) { 52 + const { monitorId, cronTimestamp, url, workspaceId } = payload; 53 + 54 + if ( 55 + process.env.NODE_ENV === "production" || 56 + process.env.NODE_ENV === "test" 57 + ) { 58 + const res = await publishPingResponse({ 59 + id: nanoid(), // TBD: we don't need it 60 + timestamp: Date.now(), 61 + statusCode, 62 + latency, 63 + region, 64 + url, 65 + monitorId, 66 + cronTimestamp, 67 + workspaceId, 68 + }); 69 + if (res.successful_rows === 0) { 70 + throw new Error(`error 0 rows on publish ping for ${payload.monitorId}`); 71 + } 72 + return res; 73 + } 74 + 75 + const res = await fakePromiseWithRandomResolve(); 76 + return res; 77 + }
+13
apps/server/src/utils/random-promise.ts
··· 1 + export function fakePromiseWithRandomResolve() { 2 + return new Promise((resolve, reject) => { 3 + const randomTime = Math.floor(Math.random() * 1000); 4 + setTimeout(() => { 5 + const shouldResolve = Math.random() < 0; // 0.5 6 + if (shouldResolve) { 7 + resolve("Promise resolved successfully."); 8 + } else { 9 + reject(new Error("Promise rejected.")); 10 + } 11 + }, randomTime); 12 + }); 13 + }
-20
apps/web/src/app/api/checker/cron/_cron.ts
··· 88 88 const [response] = await client.createTask(request); 89 89 90 90 allResult.push(response); 91 - 92 - /** 93 - * Pushing to our Golang endpoint 94 - */ 95 - const tempTask: google.cloud.tasks.v2beta3.ITask = { 96 - httpRequest: { 97 - headers: { 98 - "Content-Type": "application/json", // Set content type to ensure compatibility your application's request parsing 99 - ...(region !== "auto" && { "fly-prefer-region": region }), // Specify the region you want the request to be sent to 100 - Authorization: `Basic ${env.CRON_SECRET}`, 101 - }, 102 - httpMethod: "POST", 103 - url: "https://checker.openstatus.dev/", 104 - body: Buffer.from(JSON.stringify(payload)).toString("base64"), 105 - }, 106 - }; 107 - const tempRequest = { parent: parent, task: tempTask }; 108 - const [tempResponse] = await client.createTask(tempRequest); 109 - 110 - allResult.push(tempResponse); 111 91 } 112 92 } 113 93 await Promise.all(allResult);