+12
.env.example
+12
.env.example
···
···
1
+
MOD_DID=did:plc:xxx
2
+
OZONE_URL=ozone.example.xyz
3
+
OZONE_PDS=mushroom.us-region.host.bsky.network
4
+
BSKY_HANDLE=mod.example.xyz
5
+
BSKY_PASSWORD=xxxx
6
+
HOST=127.0.0.1
7
+
PORT=4000 // Not currently used
8
+
METRICS_PORT=4001
9
+
FIREHOSE_URL=wss://jetstream1.us-east.bsky.network/subscribe
10
+
CURSOR_UPDATE_INTERVAL=10000
11
+
LABEL_LIMIT=2900 * 1000
12
+
LABEL_LIMIT_WAIT=300 * 1000
+7
.gitignore
+7
.gitignore
+20
Dockerfile
+20
Dockerfile
···
···
1
+
# Description: Dockerfile for the Skywatch Tools
2
+
FROM node:lts
3
+
RUN curl -fsSL https://bun.sh/install | bash
4
+
ENV PATH="/root/.bun/bin:${PATH}"
5
+
6
+
# Create app directory
7
+
WORKDIR /app
8
+
COPY package*.json bun.lockb ./
9
+
10
+
# Install app dependencies
11
+
RUN bun i
12
+
13
+
# Bundle app source
14
+
COPY . .
15
+
16
+
# Expose the port the app runs
17
+
EXPOSE 4101
18
+
19
+
# Serve the app
20
+
CMD ["bun", "run", "start"]
+17
LICENSE
+17
LICENSE
···
···
1
+
Permission is hereby granted, free of charge, to any person obtaining a copy
2
+
of this software and associated documentation files (the "Software"), to deal
3
+
in the Software without restriction, including without limitation the rights
4
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
5
+
copies of the Software, and to permit persons to whom the Software is
6
+
furnished to do so, subject to the following conditions:
7
+
8
+
The above copyright notice and this permission notice shall be included in all
9
+
copies or substantial portions of the Software.
10
+
11
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17
+
SOFTWARE.
+42
README.md
+42
README.md
···
···
1
+
# skywatch-tools
2
+
3
+
This is a rewrite of the original skywatch-tools project in TypeScript. The original project was written in Bash. The purpose of this project is to automate the moderation by the Bluesky independent labeler skywatch.blue
4
+
5
+
## Installation and Setup
6
+
7
+
To install dependencies:
8
+
9
+
```bash
10
+
bun i
11
+
```
12
+
13
+
Modify .env.example with your own values and rename it to .env
14
+
15
+
```bash
16
+
bun run start
17
+
```
18
+
19
+
To run in docker:
20
+
21
+
```bash
22
+
docker build -pull -t skywatch-automod .
23
+
docker run -d -p 4101:4101 skywatch-automod
24
+
```
25
+
26
+
## Brief overview
27
+
28
+
Currently this tooling does one thing. It monitors the bluesky firehose and analyzes content for phrases which fit Skywatch's criteria for moderation. If the criteria is met, it can automatically label the content with the appropriate label.
29
+
30
+
In certain cases, where regexp will create too many false positives, it will flag content as a report against related to the account, so that it can be reviewed later.
31
+
32
+
For information on how to set-up your own checks, please see the [developing_checks.md](./src/developing_checks.md) file.
33
+
34
+
_TODO_:
35
+
36
+
- [ ] Remove unused types
37
+
- [ ] Update the types needed to be more specific to the checks rather than bluesky content types
38
+
- [ ] Consider how to write directly to OzonePDS database rather than using the API. May require running the same instance as Ozone to allow for direct database access.
39
+
- [ ] Add compose.yaml for easy deployment
40
+
- [ ] Make the metrics server work (or remove it)
41
+
42
+
Create a seperate program to watch OZONE_PDS firehose labels, and update the lists as needed. This will remove dependency on broken ruby tools created by aegis.
bun.lockb
bun.lockb
This is a binary file and will not be displayed.
+50
package.json
+50
package.json
···
···
1
+
{
2
+
"name": "skywatch-automod",
3
+
"version": "1.0.0",
4
+
"type": "module",
5
+
"scripts": {
6
+
"start": "npx tsx src/main.ts",
7
+
"dev": "npx tsx --watch src/main.ts",
8
+
"format": "bunx prettier --write .",
9
+
"lint": "bunx eslint .",
10
+
"lint:fix": "bunx eslint --fix .",
11
+
"prepare": "bunx husky install"
12
+
},
13
+
"lint-staged": {
14
+
"*": "prettier --ignore-unknown --write"
15
+
},
16
+
"devDependencies": {
17
+
"@eslint/js": "^9.15.0",
18
+
"@trivago/prettier-plugin-sort-imports": "^4.3.0",
19
+
"@types/better-sqlite3": "^7.6.12",
20
+
"@types/eslint__js": "^8.42.3",
21
+
"@types/express": "^4.17.21",
22
+
"@types/node": "^22.9.1",
23
+
"eslint": "^9.15.0",
24
+
"prettier": "^3.3.3",
25
+
"tsx": "^4.19.2",
26
+
"typescript": "^5.6.3",
27
+
"typescript-eslint": "^8.15.0"
28
+
},
29
+
"dependencies": {
30
+
"@atproto/api": "^0.13.23",
31
+
"@atproto/bsky": "^0.0.101",
32
+
"@atproto/lexicon": "^0.4.4",
33
+
"@atproto/ozone": "^0.1.62",
34
+
"@atproto/repo": "^0.6.0",
35
+
"@atproto/xrpc-server": "^0.7.4",
36
+
"@skyware/bot": "^0.3.7",
37
+
"@skyware/jetstream": "^0.2.0",
38
+
"@skyware/labeler": "^0.1.13",
39
+
"bottleneck": "^2.19.5",
40
+
"dotenv": "^16.4.5",
41
+
"express": "^4.21.1",
42
+
"husky": "^9.1.7",
43
+
"lint-staged": "^15.2.10",
44
+
"p-ratelimit": "^1.0.1",
45
+
"pino": "^9.5.0",
46
+
"pino-pretty": "^13.0.0",
47
+
"prom-client": "^15.1.3",
48
+
"undici": "^7.2.0"
49
+
}
50
+
}
+17
src/agent.ts
+17
src/agent.ts
···
···
1
+
import { setGlobalDispatcher, Agent as Agent } from "undici";
2
+
setGlobalDispatcher(new Agent({ connect: { timeout: 20_000 } }));
3
+
import { BSKY_HANDLE, BSKY_PASSWORD, OZONE_PDS } from "./config.js";
4
+
import { AtpAgent } from "@atproto/api";
5
+
6
+
export const agent = new AtpAgent({
7
+
service: `https://${OZONE_PDS}`,
8
+
});
9
+
export const login = () =>
10
+
agent.login({
11
+
identifier: BSKY_HANDLE,
12
+
password: BSKY_PASSWORD,
13
+
});
14
+
15
+
export const isLoggedIn = login()
16
+
.then(() => true)
17
+
.catch(() => false);
+56
src/checkHandles.ts
+56
src/checkHandles.ts
···
···
1
+
import { HANDLE_CHECKS } from "./constants.js";
2
+
import logger from "./logger.js";
3
+
import { Handle } from "./types.js";
4
+
import {
5
+
createAccountReport,
6
+
createAccountLabel,
7
+
createAccountComment,
8
+
} from "./moderation.js";
9
+
10
+
export const checkHandle = async (handle: Handle[]) => {
11
+
// Get a list of labels
12
+
const labels: string[] = Array.from(
13
+
HANDLE_CHECKS,
14
+
(handleCheck) => handleCheck.label,
15
+
);
16
+
17
+
// iterate through the labels
18
+
labels.forEach((label) => {
19
+
const checkList = HANDLE_CHECKS.find(
20
+
(handleCheck) => handleCheck.label === label,
21
+
);
22
+
23
+
if (checkList?.ignoredDIDs) {
24
+
if (checkList.ignoredDIDs.includes(handle[0].did)) {
25
+
return logger.info(`Whitelisted DID: ${handle[0].did}`);
26
+
}
27
+
} else {
28
+
if (checkList!.check.test(handle[0].handle)) {
29
+
if (checkList?.whitelist) {
30
+
// False-positive checks
31
+
if (checkList?.whitelist.test(handle[0].handle)) {
32
+
logger.info(`Whitelisted phrase found for: ${handle[0].handle}`);
33
+
return;
34
+
}
35
+
} else {
36
+
logger.info(`${checkList!.label} in handle: ${handle[0].handle}`);
37
+
}
38
+
39
+
if (checkList?.reportOnly === true) {
40
+
logger.info(`Report only: ${handle[0].handle}`);
41
+
createAccountReport(
42
+
handle[0].did,
43
+
`${handle[0].time}: ${checkList!.comment} - ${handle[0].handle}`,
44
+
);
45
+
return;
46
+
} else {
47
+
createAccountLabel(
48
+
handle[0].did,
49
+
`${checkList!.label}`,
50
+
`${handle[0].time}: ${checkList!.comment} - ${handle[0].handle}`,
51
+
);
52
+
}
53
+
}
54
+
}
55
+
});
56
+
};
+80
src/checkPosts.ts
+80
src/checkPosts.ts
···
···
1
+
import { POST_CHECKS } from "./constants.js";
2
+
import { Post } from "./types.js";
3
+
import logger from "./logger.js";
4
+
import {
5
+
createPostLabel,
6
+
createAccountReport,
7
+
createAccountComment,
8
+
} from "./moderation.js";
9
+
10
+
export const checkPosts = async (post: Post[]) => {
11
+
// Get a list of labels
12
+
const labels: string[] = Array.from(
13
+
POST_CHECKS,
14
+
(postCheck) => postCheck.label,
15
+
);
16
+
17
+
// Destructure Post object
18
+
const { did, time, atURI, text, cid } = post[0];
19
+
20
+
// iterate through the labels
21
+
labels.forEach((label) => {
22
+
const checkPost = POST_CHECKS.find(
23
+
(postCheck) => postCheck.label === label,
24
+
);
25
+
26
+
if (checkPost?.ignoredDIDs) {
27
+
if (checkPost.ignoredDIDs.includes(did)) {
28
+
return logger.info(`Whitelisted DID: ${did}`);
29
+
}
30
+
} else {
31
+
if (checkPost!.check.test(text)) {
32
+
if (checkPost?.whitelist) {
33
+
if (checkPost?.whitelist.test(text)) {
34
+
logger.info(`Whitelisted phrase found"`);
35
+
return;
36
+
}
37
+
} else {
38
+
logger.info(`${checkPost!.label} in post at ${atURI}`);
39
+
40
+
if (checkPost!.reportOnly === true) {
41
+
logger.info(`Report only: ${did}`);
42
+
createAccountReport(
43
+
did,
44
+
`${time}: ${checkPost?.comment} at ${atURI} with text "${text}"`,
45
+
);
46
+
return;
47
+
} else {
48
+
logger.info(`Labeling post: ${atURI}`);
49
+
50
+
createPostLabel(
51
+
post[0].atURI,
52
+
post[0].cid,
53
+
`${checkPost!.label}`,
54
+
`${post[0].time}: ${checkPost!.comment} at ${post[0].atURI} with text "${post[0].text}"`,
55
+
);
56
+
57
+
if (checkPost!.commentOnly === true) {
58
+
logger.info(`Comment only: ${post[0].did}`);
59
+
createAccountComment(
60
+
post[0].did,
61
+
`${post[0].time}: ${checkPost?.comment} at ${post[0].atURI} with text "${post[0].text}"`,
62
+
);
63
+
return;
64
+
} else if (checkPost?.label === "fundraising-link") {
65
+
return; // skip fundraising links—hardcoded because of the insane volume by spammers.
66
+
} else if (checkPost!.commentOnly === false) {
67
+
logger.info(
68
+
`Creating report for post ${post[0].atURI} on ${post[0].did}`,
69
+
);
70
+
createAccountReport(
71
+
post[0].did,
72
+
` ${post[0].time}: ${checkPost!.comment} at ${post[0].atURI} with text "${post[0].text}"`,
73
+
);
74
+
}
75
+
}
76
+
}
77
+
}
78
+
}
79
+
});
80
+
};
+82
src/checkProfiles.ts
+82
src/checkProfiles.ts
···
···
1
+
import { PROFILE_CHECKS } from "./constants.js";
2
+
import logger from "./logger.js";
3
+
import {
4
+
createAccountReport,
5
+
createAccountLabel,
6
+
createAccountComment,
7
+
} from "./moderation.js";
8
+
9
+
export const checkProfile = async (
10
+
did: string,
11
+
time: number,
12
+
displayName: string,
13
+
description: string,
14
+
) => {
15
+
// Get a list of labels
16
+
const labels: string[] = Array.from(
17
+
PROFILE_CHECKS,
18
+
(profileCheck) => profileCheck.label,
19
+
);
20
+
21
+
// iterate through the labels
22
+
labels.forEach((label) => {
23
+
const checkProfiles = PROFILE_CHECKS.find(
24
+
(profileCheck) => profileCheck.label === label,
25
+
);
26
+
27
+
// Check if DID is whitelisted
28
+
if (checkProfiles?.ignoredDIDs) {
29
+
if (checkProfiles.ignoredDIDs.includes(did)) {
30
+
return logger.info(`Whitelisted DID: ${did}`);
31
+
}
32
+
} else {
33
+
let checkCount: number = 0; // Counter for checking if any checks are found
34
+
35
+
// Check if description is enabled
36
+
if (checkProfiles?.description === true) {
37
+
if (checkProfiles!.check.test(description)) {
38
+
if (checkProfiles?.whitelist) {
39
+
if (checkProfiles?.whitelist.test(description)) {
40
+
logger.info(`Whitelisted phrase found.`);
41
+
}
42
+
} else {
43
+
logger.info(`${checkProfiles!.label} in description.`);
44
+
checkCount++;
45
+
}
46
+
}
47
+
}
48
+
49
+
if (checkProfiles?.displayName === true) {
50
+
if (checkProfiles!.check.test(displayName)) {
51
+
if (checkProfiles?.whitelist) {
52
+
if (checkProfiles?.whitelist.test(displayName)) {
53
+
logger.info(`Whitelisted phrase found for: ${displayName}`);
54
+
}
55
+
} else {
56
+
logger.info(
57
+
`${checkProfiles!.label} in display name: ${displayName}`,
58
+
);
59
+
checkCount++;
60
+
}
61
+
}
62
+
}
63
+
64
+
if (checkCount === 0) return;
65
+
66
+
if (checkProfiles.reportOnly === true) {
67
+
logger.info(`Report only: ${did}`);
68
+
createAccountReport(
69
+
did,
70
+
`${time}: ${checkProfiles!.comment} - ${displayName} - ${description}`,
71
+
);
72
+
return;
73
+
} else {
74
+
createAccountLabel(
75
+
did,
76
+
`${checkProfiles!.label}`,
77
+
`${time}: ${checkProfiles!.comment}`,
78
+
);
79
+
}
80
+
}
81
+
});
82
+
};
+24
src/config.ts
+24
src/config.ts
···
···
1
+
import "dotenv/config";
2
+
3
+
export const MOD_DID = process.env.DID ?? "";
4
+
export const OZONE_URL = process.env.OZONE_URL ?? "";
5
+
export const OZONE_PDS = process.env.OZONE_PDS ?? "";
6
+
export const BSKY_HANDLE = process.env.BSKY_HANDLE ?? "";
7
+
export const BSKY_PASSWORD = process.env.BSKY_PASSWORD ?? "";
8
+
export const HOST = process.env.HOST ?? "127.0.0.1";
9
+
export const PORT = process.env.PORT ? Number(process.env.PORT) : 4100;
10
+
export const METRICS_PORT = process.env.METRICS_PORT
11
+
? Number(process.env.METRICS_PORT)
12
+
: 4101; // Left this intact from the code I adapted this from
13
+
export const FIREHOSE_URL =
14
+
process.env.FIREHOSE_URL ?? "wss://jetstream.atproto.tools/subscribe";
15
+
export const WANTED_COLLECTION = [
16
+
"app.bsky.feed.post",
17
+
"app.bsky.actor.defs",
18
+
"app.bsky.actor.profile",
19
+
];
20
+
export const CURSOR_UPDATE_INTERVAL = process.env.CURSOR_UPDATE_INTERVAL
21
+
? Number(process.env.CURSOR_UPDATE_INTERVAL)
22
+
: 60000;
23
+
export const LABEL_LIMIT = process.env.LABEL_LIMIT;
24
+
export const LABEL_LIMIT_WAIT = process.env.LABEL_LIMIT_WAIT;
+60
src/constants.ts.example
+60
src/constants.ts.example
···
···
1
+
import { Checks } from "./types.js";
2
+
3
+
// rename this to constants.ts
4
+
5
+
export const PROFILE_CHECKS: Checks[] = [
6
+
{
7
+
label: "skub",
8
+
comment: "Pro-skub language found in profile",
9
+
description: true,
10
+
displayName: true,
11
+
reportOnly: false,
12
+
commentOnly: false,
13
+
check: new RegExp(
14
+
"(only|pro)[ -]skub|we love skub|skub is (good|god|king)|\\bskub\\b",
15
+
"i",
16
+
),
17
+
whitelist: new RegExp("(anti|[🚫]|DNI)[ -:]?skub", "i"),
18
+
ignoredDIDs: [
19
+
"did:plc:example", //Parody account
20
+
],
21
+
},
22
+
{
23
+
label: "skub-adjacent",
24
+
comment: "skub-adjacent language found in profile",
25
+
description: true,
26
+
displayName: true,
27
+
reportOnly: true,
28
+
commentOnly: false,
29
+
check: new RegExp(
30
+
"skubbe",
31
+
"i",
32
+
),
33
+
},
34
+
];
35
+
36
+
export const HANDLE_CHECKS: Checks[] = [
37
+
{
38
+
label: "skub",
39
+
comment: "Pro-skub language found in handle",
40
+
reportOnly: false,
41
+
commentOnly: false,
42
+
check: new RegExp(
43
+
"(only|pro)[-]skub|we love skub|skub[-]?is[-]?(good|god|king)|skub\\.(pro|com|org)",
44
+
"i",
45
+
),
46
+
},
47
+
];
48
+
49
+
export const POST_CHECKS: Checks[] = [
50
+
{
51
+
label: "pro-skub-link",
52
+
comment: "Pro Skub link found in post",
53
+
reportOnly: true,
54
+
commentOnly: false,
55
+
check: new RegExp(
56
+
"skubbe\\.com|skub\\.(me|pro|tech)",
57
+
"i",
58
+
),
59
+
},
60
+
];
+24
src/developing_checks.md
+24
src/developing_checks.md
···
···
1
+
# How to build checks for skywatch-automod
2
+
3
+
## Introduction
4
+
Constants.ts defines three types of types of checks: `HANDLE_CHECKS`, `POST_CHECKS`, and `PROFILE_CHECKS`.
5
+
6
+
For each check, users need to define a set of regular expressions that will be used to match against the content of the post, handle, or profile. A maximal example of a check is as follows:
7
+
8
+
```typescript
9
+
export const HANDLE_CHECKS: Checks[] = [
10
+
{
11
+
label: "example",
12
+
comment: "Example found in handle",
13
+
description: true, // Optional, only used in handle checks
14
+
displayName: true, // Optional, only used in handle checks
15
+
reportOnly: false, // it true, the check will only report the content against the account, not label.
16
+
commentOnly: false, // Poorly named, if true, will generate an account level comment from flagged posts, rather than a report. Intended for use when reportOnly is false, and on posts only where the flag may generate a high volume of reports..
17
+
check: new RegExp("example", "i"), // Regular expression to match against the content
18
+
whitelist: new RegExp("example.com", "i"), // Optional, regular expression to whitelist content
19
+
ignoredDIDs: ["did:plc:example"] // Optional, array of DIDs to ignore if they match the check. Useful for folks who reclaim words.
20
+
}
21
+
];
22
+
```
23
+
24
+
In the above example, any handle that contains the word "example" will be labeled with the label "example" unless the handle is `example.com` or the handle belongs to the user with the DID `did:plc:example`.
+11
src/limits.ts
+11
src/limits.ts
···
···
1
+
import { pRateLimit } from "p-ratelimit"; // TypeScript
2
+
3
+
// create a rate limiter that allows up to 30 API calls per second,
4
+
// with max concurrency of 10
5
+
6
+
export const limit = pRateLimit({
7
+
interval: 30000, // 1000 ms == 1 second
8
+
rate: 280, // 30 API calls per interval
9
+
concurrency: 48, // no more than 10 running at once
10
+
maxDelay: 0, // an API call delayed > 30 sec is rejected
11
+
});
+40
src/lists.ts
+40
src/lists.ts
···
···
1
+
import { List } from "./types.js";
2
+
3
+
export const LISTS: List[] = [
4
+
{
5
+
label: "troll",
6
+
rkey: "3lbckxhgu3r2v",
7
+
},
8
+
{
9
+
label: "maga-trump",
10
+
rkey: "3l53cjwlt4o2s",
11
+
},
12
+
{
13
+
label: "elon-musk",
14
+
rkey: "3l72tte74wa2m",
15
+
},
16
+
{
17
+
label: "rmve-imve",
18
+
rkey: "3l6tfurf7li27",
19
+
},
20
+
{
21
+
label: "nazi-symbolism",
22
+
rkey: "3l6vdudxgeb2z",
23
+
},
24
+
{
25
+
label: "hammer-sickle",
26
+
rkey: "3l4ue6w2aur2v",
27
+
},
28
+
{
29
+
label: "inverted-red-triangle",
30
+
rkey: "3l4ueabtpec2a",
31
+
},
32
+
{
33
+
label: "automated-reply-guy",
34
+
rkey: "3lch7qbvzpx23",
35
+
},
36
+
{
37
+
label: "terf-gc",
38
+
rkey: "3lcqjqjdejs2x",
39
+
},
40
+
];
+19
src/logger.ts
+19
src/logger.ts
···
···
1
+
import { pino } from "pino";
2
+
3
+
const logger = pino({
4
+
level: process.env.LOG_LEVEL ?? "info",
5
+
transport:
6
+
process.env.NODE_ENV !== "production"
7
+
? {
8
+
target: "pino-pretty",
9
+
options: {
10
+
colorize: true,
11
+
translateTime: "SYS:standard",
12
+
ignore: "pid,hostname",
13
+
},
14
+
}
15
+
: undefined,
16
+
timestamp: pino.stdTimeFunctions.isoTime,
17
+
});
18
+
19
+
export default logger;
+190
src/main.ts
+190
src/main.ts
···
···
1
+
import {
2
+
CommitCreateEvent,
3
+
CommitUpdateEvent,
4
+
IdentityEvent,
5
+
Jetstream,
6
+
} from "@skyware/jetstream";
7
+
import fs from "node:fs";
8
+
9
+
import {
10
+
CURSOR_UPDATE_INTERVAL,
11
+
FIREHOSE_URL,
12
+
METRICS_PORT,
13
+
WANTED_COLLECTION,
14
+
} from "./config.js";
15
+
import logger from "./logger.js";
16
+
import { startMetricsServer } from "./metrics.js";
17
+
import { Post, LinkFeature, Handle } from "./types.js";
18
+
import { checkPosts } from "./checkPosts.js";
19
+
import { checkHandle } from "./checkHandles.js";
20
+
import { checkProfile } from "./checkProfiles.js";
21
+
22
+
let cursor = 0;
23
+
let cursorUpdateInterval: NodeJS.Timeout;
24
+
25
+
function epochUsToDateTime(cursor: number): string {
26
+
return new Date(cursor / 1000).toISOString();
27
+
}
28
+
29
+
try {
30
+
logger.info("Trying to read cursor from cursor.txt...");
31
+
cursor = Number(fs.readFileSync("cursor.txt", "utf8"));
32
+
logger.info(`Cursor found: ${cursor} (${epochUsToDateTime(cursor)})`);
33
+
} catch (error) {
34
+
if (error instanceof Error && "code" in error && error.code === "ENOENT") {
35
+
cursor = Math.floor(Date.now() * 1000);
36
+
logger.info(
37
+
`Cursor not found in cursor.txt, setting cursor to: ${cursor} (${epochUsToDateTime(cursor)})`,
38
+
);
39
+
fs.writeFileSync("cursor.txt", cursor.toString(), "utf8");
40
+
} else {
41
+
logger.error(error);
42
+
process.exit(1);
43
+
}
44
+
}
45
+
46
+
const jetstream = new Jetstream({
47
+
wantedCollections: WANTED_COLLECTION,
48
+
endpoint: FIREHOSE_URL,
49
+
cursor: cursor,
50
+
});
51
+
52
+
jetstream.on("open", () => {
53
+
logger.info(
54
+
`Connected to Jetstream at ${FIREHOSE_URL} with cursor ${jetstream.cursor} (${epochUsToDateTime(jetstream.cursor!)})`,
55
+
);
56
+
cursorUpdateInterval = setInterval(() => {
57
+
if (jetstream.cursor) {
58
+
logger.info(
59
+
`Cursor updated to: ${jetstream.cursor} (${epochUsToDateTime(jetstream.cursor)})`,
60
+
);
61
+
fs.writeFile("cursor.txt", jetstream.cursor.toString(), (err) => {
62
+
if (err) logger.error(err);
63
+
});
64
+
}
65
+
}, CURSOR_UPDATE_INTERVAL);
66
+
});
67
+
68
+
jetstream.on("close", () => {
69
+
clearInterval(cursorUpdateInterval);
70
+
logger.info("Jetstream connection closed.");
71
+
});
72
+
73
+
jetstream.on("error", (error) => {
74
+
logger.error(`Jetstream error: ${error.message}`);
75
+
});
76
+
77
+
// Check for post updates
78
+
79
+
jetstream.onCreate(
80
+
"app.bsky.feed.post",
81
+
(event: CommitCreateEvent<typeof WANTED_COLLECTION>) => {
82
+
const atURI = `at://${event.did}/app.bsky.feed.post/${event.commit.rkey}`;
83
+
const hasFacets = event.commit.record.hasOwnProperty("facets");
84
+
const hasText = event.commit.record.hasOwnProperty("text");
85
+
86
+
const tasks: Promise<void>[] = [];
87
+
88
+
// Check if the record has facets
89
+
if (hasFacets) {
90
+
const hasLinkType = event.commit.record.facets!.some((facet) =>
91
+
facet.features.some(
92
+
(feature) => feature.$type === "app.bsky.richtext.facet#link",
93
+
),
94
+
);
95
+
96
+
if (hasLinkType) {
97
+
const urls = event.commit.record
98
+
.facets!.flatMap((facet) =>
99
+
facet.features.filter(
100
+
(feature) => feature.$type === "app.bsky.richtext.facet#link",
101
+
),
102
+
)
103
+
.map((feature: LinkFeature) => feature.uri);
104
+
105
+
urls.forEach((url) => {
106
+
const posts: Post[] = [
107
+
{
108
+
did: event.did,
109
+
time: event.time_us,
110
+
rkey: event.commit.rkey,
111
+
atURI: atURI,
112
+
text: url,
113
+
cid: event.commit.cid,
114
+
},
115
+
];
116
+
tasks.push(checkPosts(posts));
117
+
});
118
+
}
119
+
} else if (hasText) {
120
+
const posts: Post[] = [
121
+
{
122
+
did: event.did,
123
+
time: event.time_us,
124
+
rkey: event.commit.rkey,
125
+
atURI: atURI,
126
+
text: event.commit.record.text,
127
+
cid: event.commit.cid,
128
+
},
129
+
];
130
+
tasks.push(checkPosts(posts));
131
+
}
132
+
},
133
+
);
134
+
135
+
// Check for profile updates
136
+
jetstream.onUpdate(
137
+
"app.bsky.actor.profile",
138
+
(event: CommitUpdateEvent<typeof WANTED_COLLECTION>) => {
139
+
try {
140
+
const ret = checkProfile(
141
+
event.did,
142
+
event.time_us,
143
+
event.commit.record.displayName,
144
+
event.commit.record.description,
145
+
);
146
+
} catch (error) {
147
+
logger.error(`Error checking profile: ${error}`);
148
+
}
149
+
},
150
+
);
151
+
152
+
// Check for handle updates
153
+
jetstream.on("identity", async (event: IdentityEvent) => {
154
+
const handle: Handle[] = [
155
+
{ did: event.did, handle: event.identity.handle, time: event.time_us },
156
+
];
157
+
158
+
try {
159
+
const ret = await checkHandle(handle);
160
+
} catch (error) {
161
+
logger.error(`Error checking handle: ${error}`);
162
+
}
163
+
});
164
+
165
+
const metricsServer = startMetricsServer(METRICS_PORT);
166
+
167
+
/* labelerServer.app.listen({ port: PORT, host: HOST }, (error, address) => {
168
+
if (error) {
169
+
logger.error("Error starting server: %s", error);
170
+
} else {
171
+
logger.info(`Labeler server listening on ${address}`);
172
+
}
173
+
});*/
174
+
175
+
jetstream.start();
176
+
177
+
function shutdown() {
178
+
try {
179
+
logger.info("Shutting down gracefully...");
180
+
fs.writeFileSync("cursor.txt", jetstream.cursor!.toString(), "utf8");
181
+
jetstream.close();
182
+
metricsServer.close();
183
+
} catch (error) {
184
+
logger.error(`Error shutting down gracefully: ${error}`);
185
+
process.exit(1);
186
+
}
187
+
}
188
+
189
+
process.on("SIGINT", shutdown);
190
+
process.on("SIGTERM", shutdown);
+28
src/metrics.ts
+28
src/metrics.ts
···
···
1
+
import express from "express";
2
+
import { Registry, collectDefaultMetrics } from "prom-client";
3
+
4
+
import logger from "./logger.js";
5
+
6
+
const register = new Registry();
7
+
collectDefaultMetrics({ register });
8
+
9
+
const app = express();
10
+
11
+
app.get("/metrics", (req, res) => {
12
+
register
13
+
.metrics()
14
+
.then((metrics) => {
15
+
res.set("Content-Type", register.contentType);
16
+
res.send(metrics);
17
+
})
18
+
.catch((ex: unknown) => {
19
+
logger.error(`Error serving metrics: ${(ex as Error).message}`);
20
+
res.status(500).end((ex as Error).message);
21
+
});
22
+
});
23
+
24
+
export const startMetricsServer = (port: number, host = "127.0.0.1") => {
25
+
return app.listen(port, host, () => {
26
+
logger.info(`Metrics server is listening on ${host}:${port}`);
27
+
});
28
+
};
+187
src/moderation.ts
+187
src/moderation.ts
···
···
1
+
import { agent, isLoggedIn } from "./agent.js";
2
+
import { MOD_DID } from "./config.js";
3
+
import { limit } from "./limits.js";
4
+
import logger from "./logger.js";
5
+
import { LISTS } from "./lists.js";
6
+
7
+
export const createPostLabel = async (
8
+
uri: string,
9
+
cid: string,
10
+
label: string,
11
+
comment: string,
12
+
) => {
13
+
await isLoggedIn;
14
+
await limit(async () => {
15
+
try {
16
+
return agent.tools.ozone.moderation.emitEvent(
17
+
{
18
+
event: {
19
+
$type: "tools.ozone.moderation.defs#modEventLabel",
20
+
comment: comment,
21
+
createLabelVals: [label],
22
+
negateLabelVals: [],
23
+
},
24
+
// specify the labeled post by strongRef
25
+
subject: {
26
+
$type: "com.atproto.repo.strongRef",
27
+
uri: uri,
28
+
cid: cid,
29
+
},
30
+
// put in the rest of the metadata
31
+
createdBy: `${agent.did}`,
32
+
createdAt: new Date().toISOString(),
33
+
},
34
+
{
35
+
encoding: "application/json",
36
+
headers: {
37
+
"atproto-proxy": `${MOD_DID!}#atproto_labeler`,
38
+
"atproto-accept-labelers":
39
+
"did:plc:ar7c4by46qjdydhdevvrndac;redact",
40
+
},
41
+
},
42
+
);
43
+
} catch (e) {
44
+
console.error(e);
45
+
}
46
+
});
47
+
};
48
+
49
+
export const createAccountLabel = async (
50
+
did: string,
51
+
label: string,
52
+
comment: string,
53
+
) => {
54
+
await isLoggedIn;
55
+
await limit(async () => {
56
+
try {
57
+
await agent.tools.ozone.moderation.emitEvent(
58
+
{
59
+
event: {
60
+
$type: "tools.ozone.moderation.defs#modEventLabel",
61
+
comment: comment,
62
+
createLabelVals: [label],
63
+
negateLabelVals: [],
64
+
},
65
+
// specify the labeled post by strongRef
66
+
subject: {
67
+
$type: "com.atproto.admin.defs#repoRef",
68
+
did: did,
69
+
},
70
+
// put in the rest of the metadata
71
+
createdBy: `${agent.did}`,
72
+
createdAt: new Date().toISOString(),
73
+
},
74
+
{
75
+
encoding: "application/json",
76
+
headers: {
77
+
"atproto-proxy": `${MOD_DID!}#atproto_labeler`,
78
+
"atproto-accept-labelers":
79
+
"did:plc:ar7c4by46qjdydhdevvrndac;redact",
80
+
},
81
+
},
82
+
);
83
+
} catch (e) {
84
+
console.error(e);
85
+
}
86
+
});
87
+
};
88
+
89
+
export const createAccountComment = async (did: string, comment: string) => {
90
+
await isLoggedIn;
91
+
await limit(async () => {
92
+
try {
93
+
await agent.tools.ozone.moderation.emitEvent(
94
+
{
95
+
event: {
96
+
$type: "tools.ozone.moderation.defs#modEventComment",
97
+
comment: comment,
98
+
},
99
+
// specify the labeled post by strongRef
100
+
subject: {
101
+
$type: "com.atproto.admin.defs#repoRef",
102
+
did: did,
103
+
},
104
+
// put in the rest of the metadata
105
+
createdBy: `${agent.did}`,
106
+
createdAt: new Date().toISOString(),
107
+
},
108
+
{
109
+
encoding: "application/json",
110
+
headers: {
111
+
"atproto-proxy": `${MOD_DID!}#atproto_labeler`,
112
+
"atproto-accept-labelers":
113
+
"did:plc:ar7c4by46qjdydhdevvrndac;redact",
114
+
},
115
+
},
116
+
);
117
+
} catch (e) {
118
+
console.error(e);
119
+
}
120
+
});
121
+
};
122
+
123
+
export const createAccountReport = async (did: string, comment: string) => {
124
+
await isLoggedIn;
125
+
await limit(async () => {
126
+
try {
127
+
await agent.tools.ozone.moderation.emitEvent(
128
+
{
129
+
event: {
130
+
$type: "tools.ozone.moderation.defs#modEventReport",
131
+
comment: comment,
132
+
reportType: "com.atproto.moderation.defs#reasonOther",
133
+
},
134
+
// specify the labeled post by strongRef
135
+
subject: {
136
+
$type: "com.atproto.admin.defs#repoRef",
137
+
did: did,
138
+
},
139
+
// put in the rest of the metadata
140
+
createdBy: `${agent.did}`,
141
+
createdAt: new Date().toISOString(),
142
+
},
143
+
{
144
+
encoding: "application/json",
145
+
headers: {
146
+
"atproto-proxy": `${MOD_DID!}#atproto_labeler`,
147
+
"atproto-accept-labelers":
148
+
"did:plc:ar7c4by46qjdydhdevvrndac;redact",
149
+
},
150
+
},
151
+
);
152
+
} catch (e) {
153
+
console.error(e);
154
+
}
155
+
});
156
+
};
157
+
158
+
export const addToList = async (label: string, did: string) => {
159
+
await isLoggedIn;
160
+
161
+
const newList = LISTS.find((list) => list.label === label);
162
+
if (!newList) {
163
+
logger.warn(
164
+
`List not found for ${label}. Likely a label not associated with a list`,
165
+
);
166
+
return;
167
+
}
168
+
logger.info(`New label added to list: ${newList.label}`);
169
+
170
+
const listUri = `at://${MOD_DID!}/app.bsky.graph.list/${newList.rkey}`;
171
+
172
+
await limit(async () => {
173
+
try {
174
+
await agent.com.atproto.repo.createRecord({
175
+
collection: "app.bsky.graph.listitem",
176
+
repo: `${MOD_DID!}`,
177
+
record: {
178
+
subject: did,
179
+
list: listUri,
180
+
createdAt: new Date().toISOString(),
181
+
},
182
+
});
183
+
} catch (e) {
184
+
console.error(e);
185
+
}
186
+
});
187
+
};
+44
src/types.ts
+44
src/types.ts
···
···
1
+
export interface Checks {
2
+
label: string;
3
+
comment: string;
4
+
description?: boolean;
5
+
displayName?: boolean;
6
+
reportOnly: boolean;
7
+
commentOnly: boolean;
8
+
check: RegExp;
9
+
whitelist?: RegExp;
10
+
ignoredDIDs?: string[];
11
+
}
12
+
13
+
export interface Post {
14
+
did: string;
15
+
time: number;
16
+
rkey: string;
17
+
atURI: string;
18
+
text: string;
19
+
cid: string;
20
+
}
21
+
22
+
export interface Handle {
23
+
did: string;
24
+
time: number;
25
+
handle: string;
26
+
}
27
+
28
+
export interface Profile {
29
+
did: string;
30
+
time: number;
31
+
displayName?: string;
32
+
description?: string;
33
+
}
34
+
35
+
// Define the type for the link feature
36
+
export interface LinkFeature {
37
+
$type: "app.bsky.richtext.facet#link";
38
+
uri: string;
39
+
}
40
+
41
+
export interface List {
42
+
label: string;
43
+
rkey: string;
44
+
}
+33
src/utils.ts
+33
src/utils.ts
···
···
1
+
/* Normalize the Unicode characters: this doesn't consistently work yet, there is something about certain bluesky strings that causes it to fail. */
2
+
export function normalizeUnicode(text: string): string {
3
+
// First decompose the characters (NFD)
4
+
const decomposed = text.normalize("NFD");
5
+
6
+
// Remove diacritics and combining marks
7
+
const withoutDiacritics = decomposed.replace(/[\u0300-\u036f]/g, "");
8
+
9
+
// Remove mathematical alphanumeric symbols
10
+
const withoutMath = withoutDiacritics.replace(
11
+
/[\uD835][\uDC00-\uDFFF]/g,
12
+
(char) => {
13
+
// Get the base character from the mathematical symbol
14
+
const code = char.codePointAt(0);
15
+
if (code >= 0x1d400 && code <= 0x1d433)
16
+
// Mathematical bold
17
+
return String.fromCharCode(code - 0x1d400 + 0x41);
18
+
if (code >= 0x1d434 && code <= 0x1d467)
19
+
// Mathematical italic
20
+
return String.fromCharCode(code - 0x1d434 + 0x61);
21
+
if (code >= 0x1d468 && code <= 0x1d49b)
22
+
// Mathematical bold italic
23
+
return String.fromCharCode(code - 0x1d468 + 0x41);
24
+
if (code >= 0x1d49c && code <= 0x1d4cf)
25
+
// Mathematical script
26
+
return String.fromCharCode(code - 0x1d49c + 0x61);
27
+
return char;
28
+
},
29
+
);
30
+
31
+
// Final NFKC normalization to handle any remaining special characters
32
+
return withoutMath.normalize("NFKC");
33
+
}