A tool for parsing traffic on the jetstream and applying a moderation workstream based on regexp based rules

Init

+12
.env.example
··· 1 + MOD_DID=did:plc:xxx 2 + OZONE_URL=ozone.example.xyz 3 + OZONE_PDS=mushroom.us-region.host.bsky.network 4 + BSKY_HANDLE=mod.example.xyz 5 + BSKY_PASSWORD=xxxx 6 + HOST=127.0.0.1 7 + PORT=4000 // Not currently used 8 + METRICS_PORT=4001 9 + FIREHOSE_URL=wss://jetstream1.us-east.bsky.network/subscribe 10 + CURSOR_UPDATE_INTERVAL=10000 11 + LABEL_LIMIT=2900 * 1000 12 + LABEL_LIMIT_WAIT=300 * 1000
+7
.gitignore
··· 1 + .env 2 + node_modules 3 + cursor.txt 4 + *.log 5 + labels.db* 6 + .DS_Store 7 + src/constants.ts
+20
Dockerfile
··· 1 + # Description: Dockerfile for the Skywatch Tools 2 + FROM node:lts 3 + RUN curl -fsSL https://bun.sh/install | bash 4 + ENV PATH="/root/.bun/bin:${PATH}" 5 + 6 + # Create app directory 7 + WORKDIR /app 8 + COPY package*.json bun.lockb ./ 9 + 10 + # Install app dependencies 11 + RUN bun i 12 + 13 + # Bundle app source 14 + COPY . . 15 + 16 + # Expose the port the app runs 17 + EXPOSE 4101 18 + 19 + # Serve the app 20 + CMD ["bun", "run", "start"]
+17
LICENSE
··· 1 + Permission is hereby granted, free of charge, to any person obtaining a copy 2 + of this software and associated documentation files (the "Software"), to deal 3 + in the Software without restriction, including without limitation the rights 4 + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 5 + copies of the Software, and to permit persons to whom the Software is 6 + furnished to do so, subject to the following conditions: 7 + 8 + The above copyright notice and this permission notice shall be included in all 9 + copies or substantial portions of the Software. 10 + 11 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 17 + SOFTWARE.
+42
README.md
··· 1 + # skywatch-tools 2 + 3 + This is a rewrite of the original skywatch-tools project in TypeScript. The original project was written in Bash. The purpose of this project is to automate the moderation by the Bluesky independent labeler skywatch.blue 4 + 5 + ## Installation and Setup 6 + 7 + To install dependencies: 8 + 9 + ```bash 10 + bun i 11 + ``` 12 + 13 + Modify .env.example with your own values and rename it to .env 14 + 15 + ```bash 16 + bun run start 17 + ``` 18 + 19 + To run in docker: 20 + 21 + ```bash 22 + docker build -pull -t skywatch-automod . 23 + docker run -d -p 4101:4101 skywatch-automod 24 + ``` 25 + 26 + ## Brief overview 27 + 28 + Currently this tooling does one thing. It monitors the bluesky firehose and analyzes content for phrases which fit Skywatch's criteria for moderation. If the criteria is met, it can automatically label the content with the appropriate label. 29 + 30 + In certain cases, where regexp will create too many false positives, it will flag content as a report against related to the account, so that it can be reviewed later. 31 + 32 + For information on how to set-up your own checks, please see the [developing_checks.md](./src/developing_checks.md) file. 33 + 34 + _TODO_: 35 + 36 + - [ ] Remove unused types 37 + - [ ] Update the types needed to be more specific to the checks rather than bluesky content types 38 + - [ ] Consider how to write directly to OzonePDS database rather than using the API. May require running the same instance as Ozone to allow for direct database access. 39 + - [ ] Add compose.yaml for easy deployment 40 + - [ ] Make the metrics server work (or remove it) 41 + 42 + Create a seperate program to watch OZONE_PDS firehose labels, and update the lists as needed. This will remove dependency on broken ruby tools created by aegis.
bun.lockb

This is a binary file and will not be displayed.

+50
package.json
··· 1 + { 2 + "name": "skywatch-automod", 3 + "version": "1.0.0", 4 + "type": "module", 5 + "scripts": { 6 + "start": "npx tsx src/main.ts", 7 + "dev": "npx tsx --watch src/main.ts", 8 + "format": "bunx prettier --write .", 9 + "lint": "bunx eslint .", 10 + "lint:fix": "bunx eslint --fix .", 11 + "prepare": "bunx husky install" 12 + }, 13 + "lint-staged": { 14 + "*": "prettier --ignore-unknown --write" 15 + }, 16 + "devDependencies": { 17 + "@eslint/js": "^9.15.0", 18 + "@trivago/prettier-plugin-sort-imports": "^4.3.0", 19 + "@types/better-sqlite3": "^7.6.12", 20 + "@types/eslint__js": "^8.42.3", 21 + "@types/express": "^4.17.21", 22 + "@types/node": "^22.9.1", 23 + "eslint": "^9.15.0", 24 + "prettier": "^3.3.3", 25 + "tsx": "^4.19.2", 26 + "typescript": "^5.6.3", 27 + "typescript-eslint": "^8.15.0" 28 + }, 29 + "dependencies": { 30 + "@atproto/api": "^0.13.23", 31 + "@atproto/bsky": "^0.0.101", 32 + "@atproto/lexicon": "^0.4.4", 33 + "@atproto/ozone": "^0.1.62", 34 + "@atproto/repo": "^0.6.0", 35 + "@atproto/xrpc-server": "^0.7.4", 36 + "@skyware/bot": "^0.3.7", 37 + "@skyware/jetstream": "^0.2.0", 38 + "@skyware/labeler": "^0.1.13", 39 + "bottleneck": "^2.19.5", 40 + "dotenv": "^16.4.5", 41 + "express": "^4.21.1", 42 + "husky": "^9.1.7", 43 + "lint-staged": "^15.2.10", 44 + "p-ratelimit": "^1.0.1", 45 + "pino": "^9.5.0", 46 + "pino-pretty": "^13.0.0", 47 + "prom-client": "^15.1.3", 48 + "undici": "^7.2.0" 49 + } 50 + }
+17
src/agent.ts
··· 1 + import { setGlobalDispatcher, Agent as Agent } from "undici"; 2 + setGlobalDispatcher(new Agent({ connect: { timeout: 20_000 } })); 3 + import { BSKY_HANDLE, BSKY_PASSWORD, OZONE_PDS } from "./config.js"; 4 + import { AtpAgent } from "@atproto/api"; 5 + 6 + export const agent = new AtpAgent({ 7 + service: `https://${OZONE_PDS}`, 8 + }); 9 + export const login = () => 10 + agent.login({ 11 + identifier: BSKY_HANDLE, 12 + password: BSKY_PASSWORD, 13 + }); 14 + 15 + export const isLoggedIn = login() 16 + .then(() => true) 17 + .catch(() => false);
+56
src/checkHandles.ts
··· 1 + import { HANDLE_CHECKS } from "./constants.js"; 2 + import logger from "./logger.js"; 3 + import { Handle } from "./types.js"; 4 + import { 5 + createAccountReport, 6 + createAccountLabel, 7 + createAccountComment, 8 + } from "./moderation.js"; 9 + 10 + export const checkHandle = async (handle: Handle[]) => { 11 + // Get a list of labels 12 + const labels: string[] = Array.from( 13 + HANDLE_CHECKS, 14 + (handleCheck) => handleCheck.label, 15 + ); 16 + 17 + // iterate through the labels 18 + labels.forEach((label) => { 19 + const checkList = HANDLE_CHECKS.find( 20 + (handleCheck) => handleCheck.label === label, 21 + ); 22 + 23 + if (checkList?.ignoredDIDs) { 24 + if (checkList.ignoredDIDs.includes(handle[0].did)) { 25 + return logger.info(`Whitelisted DID: ${handle[0].did}`); 26 + } 27 + } else { 28 + if (checkList!.check.test(handle[0].handle)) { 29 + if (checkList?.whitelist) { 30 + // False-positive checks 31 + if (checkList?.whitelist.test(handle[0].handle)) { 32 + logger.info(`Whitelisted phrase found for: ${handle[0].handle}`); 33 + return; 34 + } 35 + } else { 36 + logger.info(`${checkList!.label} in handle: ${handle[0].handle}`); 37 + } 38 + 39 + if (checkList?.reportOnly === true) { 40 + logger.info(`Report only: ${handle[0].handle}`); 41 + createAccountReport( 42 + handle[0].did, 43 + `${handle[0].time}: ${checkList!.comment} - ${handle[0].handle}`, 44 + ); 45 + return; 46 + } else { 47 + createAccountLabel( 48 + handle[0].did, 49 + `${checkList!.label}`, 50 + `${handle[0].time}: ${checkList!.comment} - ${handle[0].handle}`, 51 + ); 52 + } 53 + } 54 + } 55 + }); 56 + };
+80
src/checkPosts.ts
··· 1 + import { POST_CHECKS } from "./constants.js"; 2 + import { Post } from "./types.js"; 3 + import logger from "./logger.js"; 4 + import { 5 + createPostLabel, 6 + createAccountReport, 7 + createAccountComment, 8 + } from "./moderation.js"; 9 + 10 + export const checkPosts = async (post: Post[]) => { 11 + // Get a list of labels 12 + const labels: string[] = Array.from( 13 + POST_CHECKS, 14 + (postCheck) => postCheck.label, 15 + ); 16 + 17 + // Destructure Post object 18 + const { did, time, atURI, text, cid } = post[0]; 19 + 20 + // iterate through the labels 21 + labels.forEach((label) => { 22 + const checkPost = POST_CHECKS.find( 23 + (postCheck) => postCheck.label === label, 24 + ); 25 + 26 + if (checkPost?.ignoredDIDs) { 27 + if (checkPost.ignoredDIDs.includes(did)) { 28 + return logger.info(`Whitelisted DID: ${did}`); 29 + } 30 + } else { 31 + if (checkPost!.check.test(text)) { 32 + if (checkPost?.whitelist) { 33 + if (checkPost?.whitelist.test(text)) { 34 + logger.info(`Whitelisted phrase found"`); 35 + return; 36 + } 37 + } else { 38 + logger.info(`${checkPost!.label} in post at ${atURI}`); 39 + 40 + if (checkPost!.reportOnly === true) { 41 + logger.info(`Report only: ${did}`); 42 + createAccountReport( 43 + did, 44 + `${time}: ${checkPost?.comment} at ${atURI} with text "${text}"`, 45 + ); 46 + return; 47 + } else { 48 + logger.info(`Labeling post: ${atURI}`); 49 + 50 + createPostLabel( 51 + post[0].atURI, 52 + post[0].cid, 53 + `${checkPost!.label}`, 54 + `${post[0].time}: ${checkPost!.comment} at ${post[0].atURI} with text "${post[0].text}"`, 55 + ); 56 + 57 + if (checkPost!.commentOnly === true) { 58 + logger.info(`Comment only: ${post[0].did}`); 59 + createAccountComment( 60 + post[0].did, 61 + `${post[0].time}: ${checkPost?.comment} at ${post[0].atURI} with text "${post[0].text}"`, 62 + ); 63 + return; 64 + } else if (checkPost?.label === "fundraising-link") { 65 + return; // skip fundraising links—hardcoded because of the insane volume by spammers. 66 + } else if (checkPost!.commentOnly === false) { 67 + logger.info( 68 + `Creating report for post ${post[0].atURI} on ${post[0].did}`, 69 + ); 70 + createAccountReport( 71 + post[0].did, 72 + ` ${post[0].time}: ${checkPost!.comment} at ${post[0].atURI} with text "${post[0].text}"`, 73 + ); 74 + } 75 + } 76 + } 77 + } 78 + } 79 + }); 80 + };
+82
src/checkProfiles.ts
··· 1 + import { PROFILE_CHECKS } from "./constants.js"; 2 + import logger from "./logger.js"; 3 + import { 4 + createAccountReport, 5 + createAccountLabel, 6 + createAccountComment, 7 + } from "./moderation.js"; 8 + 9 + export const checkProfile = async ( 10 + did: string, 11 + time: number, 12 + displayName: string, 13 + description: string, 14 + ) => { 15 + // Get a list of labels 16 + const labels: string[] = Array.from( 17 + PROFILE_CHECKS, 18 + (profileCheck) => profileCheck.label, 19 + ); 20 + 21 + // iterate through the labels 22 + labels.forEach((label) => { 23 + const checkProfiles = PROFILE_CHECKS.find( 24 + (profileCheck) => profileCheck.label === label, 25 + ); 26 + 27 + // Check if DID is whitelisted 28 + if (checkProfiles?.ignoredDIDs) { 29 + if (checkProfiles.ignoredDIDs.includes(did)) { 30 + return logger.info(`Whitelisted DID: ${did}`); 31 + } 32 + } else { 33 + let checkCount: number = 0; // Counter for checking if any checks are found 34 + 35 + // Check if description is enabled 36 + if (checkProfiles?.description === true) { 37 + if (checkProfiles!.check.test(description)) { 38 + if (checkProfiles?.whitelist) { 39 + if (checkProfiles?.whitelist.test(description)) { 40 + logger.info(`Whitelisted phrase found.`); 41 + } 42 + } else { 43 + logger.info(`${checkProfiles!.label} in description.`); 44 + checkCount++; 45 + } 46 + } 47 + } 48 + 49 + if (checkProfiles?.displayName === true) { 50 + if (checkProfiles!.check.test(displayName)) { 51 + if (checkProfiles?.whitelist) { 52 + if (checkProfiles?.whitelist.test(displayName)) { 53 + logger.info(`Whitelisted phrase found for: ${displayName}`); 54 + } 55 + } else { 56 + logger.info( 57 + `${checkProfiles!.label} in display name: ${displayName}`, 58 + ); 59 + checkCount++; 60 + } 61 + } 62 + } 63 + 64 + if (checkCount === 0) return; 65 + 66 + if (checkProfiles.reportOnly === true) { 67 + logger.info(`Report only: ${did}`); 68 + createAccountReport( 69 + did, 70 + `${time}: ${checkProfiles!.comment} - ${displayName} - ${description}`, 71 + ); 72 + return; 73 + } else { 74 + createAccountLabel( 75 + did, 76 + `${checkProfiles!.label}`, 77 + `${time}: ${checkProfiles!.comment}`, 78 + ); 79 + } 80 + } 81 + }); 82 + };
+24
src/config.ts
··· 1 + import "dotenv/config"; 2 + 3 + export const MOD_DID = process.env.DID ?? ""; 4 + export const OZONE_URL = process.env.OZONE_URL ?? ""; 5 + export const OZONE_PDS = process.env.OZONE_PDS ?? ""; 6 + export const BSKY_HANDLE = process.env.BSKY_HANDLE ?? ""; 7 + export const BSKY_PASSWORD = process.env.BSKY_PASSWORD ?? ""; 8 + export const HOST = process.env.HOST ?? "127.0.0.1"; 9 + export const PORT = process.env.PORT ? Number(process.env.PORT) : 4100; 10 + export const METRICS_PORT = process.env.METRICS_PORT 11 + ? Number(process.env.METRICS_PORT) 12 + : 4101; // Left this intact from the code I adapted this from 13 + export const FIREHOSE_URL = 14 + process.env.FIREHOSE_URL ?? "wss://jetstream.atproto.tools/subscribe"; 15 + export const WANTED_COLLECTION = [ 16 + "app.bsky.feed.post", 17 + "app.bsky.actor.defs", 18 + "app.bsky.actor.profile", 19 + ]; 20 + export const CURSOR_UPDATE_INTERVAL = process.env.CURSOR_UPDATE_INTERVAL 21 + ? Number(process.env.CURSOR_UPDATE_INTERVAL) 22 + : 60000; 23 + export const LABEL_LIMIT = process.env.LABEL_LIMIT; 24 + export const LABEL_LIMIT_WAIT = process.env.LABEL_LIMIT_WAIT;
+60
src/constants.ts.example
··· 1 + import { Checks } from "./types.js"; 2 + 3 + // rename this to constants.ts 4 + 5 + export const PROFILE_CHECKS: Checks[] = [ 6 + { 7 + label: "skub", 8 + comment: "Pro-skub language found in profile", 9 + description: true, 10 + displayName: true, 11 + reportOnly: false, 12 + commentOnly: false, 13 + check: new RegExp( 14 + "(only|pro)[ -]skub|we love skub|skub is (good|god|king)|\\bskub\\b", 15 + "i", 16 + ), 17 + whitelist: new RegExp("(anti|[🚫]|DNI)[ -:]?skub", "i"), 18 + ignoredDIDs: [ 19 + "did:plc:example", //Parody account 20 + ], 21 + }, 22 + { 23 + label: "skub-adjacent", 24 + comment: "skub-adjacent language found in profile", 25 + description: true, 26 + displayName: true, 27 + reportOnly: true, 28 + commentOnly: false, 29 + check: new RegExp( 30 + "skubbe", 31 + "i", 32 + ), 33 + }, 34 + ]; 35 + 36 + export const HANDLE_CHECKS: Checks[] = [ 37 + { 38 + label: "skub", 39 + comment: "Pro-skub language found in handle", 40 + reportOnly: false, 41 + commentOnly: false, 42 + check: new RegExp( 43 + "(only|pro)[-]skub|we love skub|skub[-]?is[-]?(good|god|king)|skub\\.(pro|com|org)", 44 + "i", 45 + ), 46 + }, 47 + ]; 48 + 49 + export const POST_CHECKS: Checks[] = [ 50 + { 51 + label: "pro-skub-link", 52 + comment: "Pro Skub link found in post", 53 + reportOnly: true, 54 + commentOnly: false, 55 + check: new RegExp( 56 + "skubbe\\.com|skub\\.(me|pro|tech)", 57 + "i", 58 + ), 59 + }, 60 + ];
+24
src/developing_checks.md
··· 1 + # How to build checks for skywatch-automod 2 + 3 + ## Introduction 4 + Constants.ts defines three types of types of checks: `HANDLE_CHECKS`, `POST_CHECKS`, and `PROFILE_CHECKS`. 5 + 6 + For each check, users need to define a set of regular expressions that will be used to match against the content of the post, handle, or profile. A maximal example of a check is as follows: 7 + 8 + ```typescript 9 + export const HANDLE_CHECKS: Checks[] = [ 10 + { 11 + label: "example", 12 + comment: "Example found in handle", 13 + description: true, // Optional, only used in handle checks 14 + displayName: true, // Optional, only used in handle checks 15 + reportOnly: false, // it true, the check will only report the content against the account, not label. 16 + commentOnly: false, // Poorly named, if true, will generate an account level comment from flagged posts, rather than a report. Intended for use when reportOnly is false, and on posts only where the flag may generate a high volume of reports.. 17 + check: new RegExp("example", "i"), // Regular expression to match against the content 18 + whitelist: new RegExp("example.com", "i"), // Optional, regular expression to whitelist content 19 + ignoredDIDs: ["did:plc:example"] // Optional, array of DIDs to ignore if they match the check. Useful for folks who reclaim words. 20 + } 21 + ]; 22 + ``` 23 + 24 + In the above example, any handle that contains the word "example" will be labeled with the label "example" unless the handle is `example.com` or the handle belongs to the user with the DID `did:plc:example`.
+11
src/limits.ts
··· 1 + import { pRateLimit } from "p-ratelimit"; // TypeScript 2 + 3 + // create a rate limiter that allows up to 30 API calls per second, 4 + // with max concurrency of 10 5 + 6 + export const limit = pRateLimit({ 7 + interval: 30000, // 1000 ms == 1 second 8 + rate: 280, // 30 API calls per interval 9 + concurrency: 48, // no more than 10 running at once 10 + maxDelay: 0, // an API call delayed > 30 sec is rejected 11 + });
+40
src/lists.ts
··· 1 + import { List } from "./types.js"; 2 + 3 + export const LISTS: List[] = [ 4 + { 5 + label: "troll", 6 + rkey: "3lbckxhgu3r2v", 7 + }, 8 + { 9 + label: "maga-trump", 10 + rkey: "3l53cjwlt4o2s", 11 + }, 12 + { 13 + label: "elon-musk", 14 + rkey: "3l72tte74wa2m", 15 + }, 16 + { 17 + label: "rmve-imve", 18 + rkey: "3l6tfurf7li27", 19 + }, 20 + { 21 + label: "nazi-symbolism", 22 + rkey: "3l6vdudxgeb2z", 23 + }, 24 + { 25 + label: "hammer-sickle", 26 + rkey: "3l4ue6w2aur2v", 27 + }, 28 + { 29 + label: "inverted-red-triangle", 30 + rkey: "3l4ueabtpec2a", 31 + }, 32 + { 33 + label: "automated-reply-guy", 34 + rkey: "3lch7qbvzpx23", 35 + }, 36 + { 37 + label: "terf-gc", 38 + rkey: "3lcqjqjdejs2x", 39 + }, 40 + ];
+19
src/logger.ts
··· 1 + import { pino } from "pino"; 2 + 3 + const logger = pino({ 4 + level: process.env.LOG_LEVEL ?? "info", 5 + transport: 6 + process.env.NODE_ENV !== "production" 7 + ? { 8 + target: "pino-pretty", 9 + options: { 10 + colorize: true, 11 + translateTime: "SYS:standard", 12 + ignore: "pid,hostname", 13 + }, 14 + } 15 + : undefined, 16 + timestamp: pino.stdTimeFunctions.isoTime, 17 + }); 18 + 19 + export default logger;
+190
src/main.ts
··· 1 + import { 2 + CommitCreateEvent, 3 + CommitUpdateEvent, 4 + IdentityEvent, 5 + Jetstream, 6 + } from "@skyware/jetstream"; 7 + import fs from "node:fs"; 8 + 9 + import { 10 + CURSOR_UPDATE_INTERVAL, 11 + FIREHOSE_URL, 12 + METRICS_PORT, 13 + WANTED_COLLECTION, 14 + } from "./config.js"; 15 + import logger from "./logger.js"; 16 + import { startMetricsServer } from "./metrics.js"; 17 + import { Post, LinkFeature, Handle } from "./types.js"; 18 + import { checkPosts } from "./checkPosts.js"; 19 + import { checkHandle } from "./checkHandles.js"; 20 + import { checkProfile } from "./checkProfiles.js"; 21 + 22 + let cursor = 0; 23 + let cursorUpdateInterval: NodeJS.Timeout; 24 + 25 + function epochUsToDateTime(cursor: number): string { 26 + return new Date(cursor / 1000).toISOString(); 27 + } 28 + 29 + try { 30 + logger.info("Trying to read cursor from cursor.txt..."); 31 + cursor = Number(fs.readFileSync("cursor.txt", "utf8")); 32 + logger.info(`Cursor found: ${cursor} (${epochUsToDateTime(cursor)})`); 33 + } catch (error) { 34 + if (error instanceof Error && "code" in error && error.code === "ENOENT") { 35 + cursor = Math.floor(Date.now() * 1000); 36 + logger.info( 37 + `Cursor not found in cursor.txt, setting cursor to: ${cursor} (${epochUsToDateTime(cursor)})`, 38 + ); 39 + fs.writeFileSync("cursor.txt", cursor.toString(), "utf8"); 40 + } else { 41 + logger.error(error); 42 + process.exit(1); 43 + } 44 + } 45 + 46 + const jetstream = new Jetstream({ 47 + wantedCollections: WANTED_COLLECTION, 48 + endpoint: FIREHOSE_URL, 49 + cursor: cursor, 50 + }); 51 + 52 + jetstream.on("open", () => { 53 + logger.info( 54 + `Connected to Jetstream at ${FIREHOSE_URL} with cursor ${jetstream.cursor} (${epochUsToDateTime(jetstream.cursor!)})`, 55 + ); 56 + cursorUpdateInterval = setInterval(() => { 57 + if (jetstream.cursor) { 58 + logger.info( 59 + `Cursor updated to: ${jetstream.cursor} (${epochUsToDateTime(jetstream.cursor)})`, 60 + ); 61 + fs.writeFile("cursor.txt", jetstream.cursor.toString(), (err) => { 62 + if (err) logger.error(err); 63 + }); 64 + } 65 + }, CURSOR_UPDATE_INTERVAL); 66 + }); 67 + 68 + jetstream.on("close", () => { 69 + clearInterval(cursorUpdateInterval); 70 + logger.info("Jetstream connection closed."); 71 + }); 72 + 73 + jetstream.on("error", (error) => { 74 + logger.error(`Jetstream error: ${error.message}`); 75 + }); 76 + 77 + // Check for post updates 78 + 79 + jetstream.onCreate( 80 + "app.bsky.feed.post", 81 + (event: CommitCreateEvent<typeof WANTED_COLLECTION>) => { 82 + const atURI = `at://${event.did}/app.bsky.feed.post/${event.commit.rkey}`; 83 + const hasFacets = event.commit.record.hasOwnProperty("facets"); 84 + const hasText = event.commit.record.hasOwnProperty("text"); 85 + 86 + const tasks: Promise<void>[] = []; 87 + 88 + // Check if the record has facets 89 + if (hasFacets) { 90 + const hasLinkType = event.commit.record.facets!.some((facet) => 91 + facet.features.some( 92 + (feature) => feature.$type === "app.bsky.richtext.facet#link", 93 + ), 94 + ); 95 + 96 + if (hasLinkType) { 97 + const urls = event.commit.record 98 + .facets!.flatMap((facet) => 99 + facet.features.filter( 100 + (feature) => feature.$type === "app.bsky.richtext.facet#link", 101 + ), 102 + ) 103 + .map((feature: LinkFeature) => feature.uri); 104 + 105 + urls.forEach((url) => { 106 + const posts: Post[] = [ 107 + { 108 + did: event.did, 109 + time: event.time_us, 110 + rkey: event.commit.rkey, 111 + atURI: atURI, 112 + text: url, 113 + cid: event.commit.cid, 114 + }, 115 + ]; 116 + tasks.push(checkPosts(posts)); 117 + }); 118 + } 119 + } else if (hasText) { 120 + const posts: Post[] = [ 121 + { 122 + did: event.did, 123 + time: event.time_us, 124 + rkey: event.commit.rkey, 125 + atURI: atURI, 126 + text: event.commit.record.text, 127 + cid: event.commit.cid, 128 + }, 129 + ]; 130 + tasks.push(checkPosts(posts)); 131 + } 132 + }, 133 + ); 134 + 135 + // Check for profile updates 136 + jetstream.onUpdate( 137 + "app.bsky.actor.profile", 138 + (event: CommitUpdateEvent<typeof WANTED_COLLECTION>) => { 139 + try { 140 + const ret = checkProfile( 141 + event.did, 142 + event.time_us, 143 + event.commit.record.displayName, 144 + event.commit.record.description, 145 + ); 146 + } catch (error) { 147 + logger.error(`Error checking profile: ${error}`); 148 + } 149 + }, 150 + ); 151 + 152 + // Check for handle updates 153 + jetstream.on("identity", async (event: IdentityEvent) => { 154 + const handle: Handle[] = [ 155 + { did: event.did, handle: event.identity.handle, time: event.time_us }, 156 + ]; 157 + 158 + try { 159 + const ret = await checkHandle(handle); 160 + } catch (error) { 161 + logger.error(`Error checking handle: ${error}`); 162 + } 163 + }); 164 + 165 + const metricsServer = startMetricsServer(METRICS_PORT); 166 + 167 + /* labelerServer.app.listen({ port: PORT, host: HOST }, (error, address) => { 168 + if (error) { 169 + logger.error("Error starting server: %s", error); 170 + } else { 171 + logger.info(`Labeler server listening on ${address}`); 172 + } 173 + });*/ 174 + 175 + jetstream.start(); 176 + 177 + function shutdown() { 178 + try { 179 + logger.info("Shutting down gracefully..."); 180 + fs.writeFileSync("cursor.txt", jetstream.cursor!.toString(), "utf8"); 181 + jetstream.close(); 182 + metricsServer.close(); 183 + } catch (error) { 184 + logger.error(`Error shutting down gracefully: ${error}`); 185 + process.exit(1); 186 + } 187 + } 188 + 189 + process.on("SIGINT", shutdown); 190 + process.on("SIGTERM", shutdown);
+28
src/metrics.ts
··· 1 + import express from "express"; 2 + import { Registry, collectDefaultMetrics } from "prom-client"; 3 + 4 + import logger from "./logger.js"; 5 + 6 + const register = new Registry(); 7 + collectDefaultMetrics({ register }); 8 + 9 + const app = express(); 10 + 11 + app.get("/metrics", (req, res) => { 12 + register 13 + .metrics() 14 + .then((metrics) => { 15 + res.set("Content-Type", register.contentType); 16 + res.send(metrics); 17 + }) 18 + .catch((ex: unknown) => { 19 + logger.error(`Error serving metrics: ${(ex as Error).message}`); 20 + res.status(500).end((ex as Error).message); 21 + }); 22 + }); 23 + 24 + export const startMetricsServer = (port: number, host = "127.0.0.1") => { 25 + return app.listen(port, host, () => { 26 + logger.info(`Metrics server is listening on ${host}:${port}`); 27 + }); 28 + };
+187
src/moderation.ts
··· 1 + import { agent, isLoggedIn } from "./agent.js"; 2 + import { MOD_DID } from "./config.js"; 3 + import { limit } from "./limits.js"; 4 + import logger from "./logger.js"; 5 + import { LISTS } from "./lists.js"; 6 + 7 + export const createPostLabel = async ( 8 + uri: string, 9 + cid: string, 10 + label: string, 11 + comment: string, 12 + ) => { 13 + await isLoggedIn; 14 + await limit(async () => { 15 + try { 16 + return agent.tools.ozone.moderation.emitEvent( 17 + { 18 + event: { 19 + $type: "tools.ozone.moderation.defs#modEventLabel", 20 + comment: comment, 21 + createLabelVals: [label], 22 + negateLabelVals: [], 23 + }, 24 + // specify the labeled post by strongRef 25 + subject: { 26 + $type: "com.atproto.repo.strongRef", 27 + uri: uri, 28 + cid: cid, 29 + }, 30 + // put in the rest of the metadata 31 + createdBy: `${agent.did}`, 32 + createdAt: new Date().toISOString(), 33 + }, 34 + { 35 + encoding: "application/json", 36 + headers: { 37 + "atproto-proxy": `${MOD_DID!}#atproto_labeler`, 38 + "atproto-accept-labelers": 39 + "did:plc:ar7c4by46qjdydhdevvrndac;redact", 40 + }, 41 + }, 42 + ); 43 + } catch (e) { 44 + console.error(e); 45 + } 46 + }); 47 + }; 48 + 49 + export const createAccountLabel = async ( 50 + did: string, 51 + label: string, 52 + comment: string, 53 + ) => { 54 + await isLoggedIn; 55 + await limit(async () => { 56 + try { 57 + await agent.tools.ozone.moderation.emitEvent( 58 + { 59 + event: { 60 + $type: "tools.ozone.moderation.defs#modEventLabel", 61 + comment: comment, 62 + createLabelVals: [label], 63 + negateLabelVals: [], 64 + }, 65 + // specify the labeled post by strongRef 66 + subject: { 67 + $type: "com.atproto.admin.defs#repoRef", 68 + did: did, 69 + }, 70 + // put in the rest of the metadata 71 + createdBy: `${agent.did}`, 72 + createdAt: new Date().toISOString(), 73 + }, 74 + { 75 + encoding: "application/json", 76 + headers: { 77 + "atproto-proxy": `${MOD_DID!}#atproto_labeler`, 78 + "atproto-accept-labelers": 79 + "did:plc:ar7c4by46qjdydhdevvrndac;redact", 80 + }, 81 + }, 82 + ); 83 + } catch (e) { 84 + console.error(e); 85 + } 86 + }); 87 + }; 88 + 89 + export const createAccountComment = async (did: string, comment: string) => { 90 + await isLoggedIn; 91 + await limit(async () => { 92 + try { 93 + await agent.tools.ozone.moderation.emitEvent( 94 + { 95 + event: { 96 + $type: "tools.ozone.moderation.defs#modEventComment", 97 + comment: comment, 98 + }, 99 + // specify the labeled post by strongRef 100 + subject: { 101 + $type: "com.atproto.admin.defs#repoRef", 102 + did: did, 103 + }, 104 + // put in the rest of the metadata 105 + createdBy: `${agent.did}`, 106 + createdAt: new Date().toISOString(), 107 + }, 108 + { 109 + encoding: "application/json", 110 + headers: { 111 + "atproto-proxy": `${MOD_DID!}#atproto_labeler`, 112 + "atproto-accept-labelers": 113 + "did:plc:ar7c4by46qjdydhdevvrndac;redact", 114 + }, 115 + }, 116 + ); 117 + } catch (e) { 118 + console.error(e); 119 + } 120 + }); 121 + }; 122 + 123 + export const createAccountReport = async (did: string, comment: string) => { 124 + await isLoggedIn; 125 + await limit(async () => { 126 + try { 127 + await agent.tools.ozone.moderation.emitEvent( 128 + { 129 + event: { 130 + $type: "tools.ozone.moderation.defs#modEventReport", 131 + comment: comment, 132 + reportType: "com.atproto.moderation.defs#reasonOther", 133 + }, 134 + // specify the labeled post by strongRef 135 + subject: { 136 + $type: "com.atproto.admin.defs#repoRef", 137 + did: did, 138 + }, 139 + // put in the rest of the metadata 140 + createdBy: `${agent.did}`, 141 + createdAt: new Date().toISOString(), 142 + }, 143 + { 144 + encoding: "application/json", 145 + headers: { 146 + "atproto-proxy": `${MOD_DID!}#atproto_labeler`, 147 + "atproto-accept-labelers": 148 + "did:plc:ar7c4by46qjdydhdevvrndac;redact", 149 + }, 150 + }, 151 + ); 152 + } catch (e) { 153 + console.error(e); 154 + } 155 + }); 156 + }; 157 + 158 + export const addToList = async (label: string, did: string) => { 159 + await isLoggedIn; 160 + 161 + const newList = LISTS.find((list) => list.label === label); 162 + if (!newList) { 163 + logger.warn( 164 + `List not found for ${label}. Likely a label not associated with a list`, 165 + ); 166 + return; 167 + } 168 + logger.info(`New label added to list: ${newList.label}`); 169 + 170 + const listUri = `at://${MOD_DID!}/app.bsky.graph.list/${newList.rkey}`; 171 + 172 + await limit(async () => { 173 + try { 174 + await agent.com.atproto.repo.createRecord({ 175 + collection: "app.bsky.graph.listitem", 176 + repo: `${MOD_DID!}`, 177 + record: { 178 + subject: did, 179 + list: listUri, 180 + createdAt: new Date().toISOString(), 181 + }, 182 + }); 183 + } catch (e) { 184 + console.error(e); 185 + } 186 + }); 187 + };
+44
src/types.ts
··· 1 + export interface Checks { 2 + label: string; 3 + comment: string; 4 + description?: boolean; 5 + displayName?: boolean; 6 + reportOnly: boolean; 7 + commentOnly: boolean; 8 + check: RegExp; 9 + whitelist?: RegExp; 10 + ignoredDIDs?: string[]; 11 + } 12 + 13 + export interface Post { 14 + did: string; 15 + time: number; 16 + rkey: string; 17 + atURI: string; 18 + text: string; 19 + cid: string; 20 + } 21 + 22 + export interface Handle { 23 + did: string; 24 + time: number; 25 + handle: string; 26 + } 27 + 28 + export interface Profile { 29 + did: string; 30 + time: number; 31 + displayName?: string; 32 + description?: string; 33 + } 34 + 35 + // Define the type for the link feature 36 + export interface LinkFeature { 37 + $type: "app.bsky.richtext.facet#link"; 38 + uri: string; 39 + } 40 + 41 + export interface List { 42 + label: string; 43 + rkey: string; 44 + }
+33
src/utils.ts
··· 1 + /* Normalize the Unicode characters: this doesn't consistently work yet, there is something about certain bluesky strings that causes it to fail. */ 2 + export function normalizeUnicode(text: string): string { 3 + // First decompose the characters (NFD) 4 + const decomposed = text.normalize("NFD"); 5 + 6 + // Remove diacritics and combining marks 7 + const withoutDiacritics = decomposed.replace(/[\u0300-\u036f]/g, ""); 8 + 9 + // Remove mathematical alphanumeric symbols 10 + const withoutMath = withoutDiacritics.replace( 11 + /[\uD835][\uDC00-\uDFFF]/g, 12 + (char) => { 13 + // Get the base character from the mathematical symbol 14 + const code = char.codePointAt(0); 15 + if (code >= 0x1d400 && code <= 0x1d433) 16 + // Mathematical bold 17 + return String.fromCharCode(code - 0x1d400 + 0x41); 18 + if (code >= 0x1d434 && code <= 0x1d467) 19 + // Mathematical italic 20 + return String.fromCharCode(code - 0x1d434 + 0x61); 21 + if (code >= 0x1d468 && code <= 0x1d49b) 22 + // Mathematical bold italic 23 + return String.fromCharCode(code - 0x1d468 + 0x41); 24 + if (code >= 0x1d49c && code <= 0x1d4cf) 25 + // Mathematical script 26 + return String.fromCharCode(code - 0x1d49c + 0x61); 27 + return char; 28 + }, 29 + ); 30 + 31 + // Final NFKC normalization to handle any remaining special characters 32 + return withoutMath.normalize("NFKC"); 33 + }
+11
tsconfig.json
··· 1 + { 2 + "compilerOptions": { 3 + "strict": true, 4 + "target": "ESNext", 5 + "module": "NodeNext", 6 + "moduleResolution": "NodeNext", 7 + "allowSyntheticDefaultImports": true, 8 + "esModuleInterop": true, 9 + "types": ["node"] 10 + } 11 + }