Archive of the USPol Labeler's automatic labeling script. Out-of-date. Must run alongside Ozone. DO NOT OPEN ISSUES OR PULLS -- THEY WILL BE IGNORED/CLOSED
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: poll hashtags, also consider hidden tags for keywords

(the latter is mostly so that hidden tags for uspol get considered)

This technically uses the Bluesky search endpoint, so please set
your poll interval quite high.

(On my bot, the uspol hashtag will apply a label to EVERY result!)

+109 -2
+6 -1
main.ts
··· 12 12 import { pollBlueskyList } from "./src/poll_list.ts"; 13 13 import { firehoseEventToRecordView } from "./src/convert.ts"; 14 14 import { RedisClient } from "jsr:@iuioiua/r2d2"; 15 + import { pollHashtag } from "./src/poll_hashtag.ts"; 15 16 const { BskyAgent } = api; // RIP need module export 16 17 17 18 // load .env file ··· 37 38 .option("--poll-feed <feedAtUri>", "A feed to check periodically. You'll need the feed's AT URI.", {collect: true}) 38 39 .option("--poll-chronological-feed <feedAtUri>", "A feed to check periodically. You'll need the feed's AT URI. Automatically paginates.", {collect: true}) 39 40 .option("--poll-list <listAtUri>", "A list to check periodically. This may point to an AT URI of a Bluesky list, or to a file where each line is the DID or handle of the account to crawl.", {collect: true, }) 41 + .option("--hashtag <hashtag>", "A hashtag (without the hash) or hidden tag to check periodically.", {collect: true, }) 40 42 .option("--firehose", "Listen on the firehose.") 41 43 .option("--poll-reports", "Poll for reports.") 42 44 //.option("--crawl-quote-chain", "Follow the quote chain.") ··· 56 58 .env("STAR_SPANGLED_PAGE_SIZE=<limit:number>", "For chronological feeds and lists, how large each page should be.", {prefix: "STAR_SPANGLED_"}) 57 59 .parse(Deno.args) 58 60 59 - if (!options.firehose && !options.pollReports && !options.pollFeed && !options.pollChronologicalFeed && !options.pollList) { 61 + if (!options.firehose && !options.pollReports && !options.pollFeed && !options.pollChronologicalFeed && !options.pollList && !options.hashtag) { 60 62 console.error("Must specify at least one option") 61 63 cmd.showHelp() 62 64 Deno.exit(127) ··· 238 240 } 239 241 for (const listAtUri of options.pollList || []) { 240 242 await pollBlueskyList(labelerAgent, me, listAtUri) 243 + } 244 + for (const hashtag of options.hashtag || []) { 245 + await pollHashtag(labelerAgent, me, hashtag) 241 246 } 242 247 } finally { 243 248 console.debug(`-- Polling done. Will poll again in ${POLL_INTERVAL} seconds`)
+4 -1
src/handler.ts
··· 24 24 * points. */ 25 25 export const CRAWLED_BONUS_POINTS = 10; 26 26 27 - const publicAgent = new Agent(new CredentialSession(new URL("https://public.api.bsky.app"), fetch, undefined)); 27 + export const publicAgent = new Agent(new CredentialSession(new URL("https://public.api.bsky.app"), fetch, undefined)); 28 28 29 29 function escapeRegExp(string: string) { 30 30 return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string ··· 97 97 const bridgyOriginalText = (post.record as any).bridgyOriginalText; 98 98 allTextList.push(bridgyOriginalText) 99 99 // TODO: if BridgyFed ever adds a CW field, I can add it here 100 + } 101 + for (const tag in (post.record as AppBskyFeedPost.Record).tags) { 102 + allTextList.push("#"+tag); 100 103 } 101 104 const allText = allTextList.join(" "); 102 105 const splitPost = allText.toLowerCase().split(WORD_SEPARATOR_PATTERN).filter((v,_,__) => v != "")
+99
src/poll_hashtag.ts
··· 1 + import { Agent, AppBskyActorGetProfile } from "@atproto/api"; 2 + import { auditPostForKeywords, publicAgent } from "./handler.ts"; 3 + import { DRY_RUN, INITIAL_PAGES_LIMIT, PAGE_SIZE, PAGES_LIMIT, redis, VERBOSE } from "../main.ts"; 4 + import { retry } from "@atproto/common"; 5 + import { hasAlreadyHandled } from "./redis.ts"; 6 + import { clamp } from "./utils.ts"; 7 + 8 + const hashtagMostRecent: Record<string, string> = {} 9 + async function setHashtagCursor(hashtag: string, cursor: string) { 10 + if (redis) { 11 + return await retry(async () => await redis!.sendCommand(["HSET", "hashtagCursors", hashtag, cursor]), {maxRetries: 3}); 12 + } else { 13 + return hashtagMostRecent[hashtag] = cursor; 14 + } 15 + } 16 + async function getHashtagCursor(hashtag: string): Promise<string|undefined> { 17 + if (redis) { 18 + const reply = await retry(async () => await redis!.sendCommand(["HGET", "hashtagCursors", hashtag]), {maxRetries: 3}); 19 + if (typeof reply == "string") return reply; 20 + return undefined; 21 + } else { 22 + return hashtagMostRecent[hashtag]; 23 + } 24 + } 25 + 26 + export async function pollHashtag(agent: Agent, me: AppBskyActorGetProfile.Response, hashtag: string, cursor?: string|undefined, depth: number = 0) { 27 + // deno-lint-ignore no-var 28 + var anyPostFailed = false; 29 + try { 30 + const feedEntries = await agent.app.bsky.feed.searchPosts({ 31 + q: "#"+hashtag, 32 + limit: clamp(PAGE_SIZE, 1, 100), 33 + cursor: cursor || undefined, 34 + sort: "latest" 35 + }, {headers: {"atproto-proxy": ""}}); 36 + if (!feedEntries.success) return; 37 + if (feedEntries.data.posts.length == 0) return; 38 + const mostRecent = await getHashtagCursor(hashtag); 39 + if (INITIAL_PAGES_LIMIT === 0 && !mostRecent) { 40 + // can maybe be a zero-like value (undefined), 41 + // so we have to strict type match. 42 + // If INITIAL_PAGES_LIMIT is 0, don't do any initial labeling at all. 43 + return; 44 + } 45 + for (const post of feedEntries.data.posts) { 46 + if (post.uri == mostRecent) { 47 + if (VERBOSE) console.log("Finishing with hashtag polling early") 48 + if (depth == 0 && !anyPostFailed) { 49 + if (VERBOSE) console.log("Most recent before:", mostRecent); 50 + await setHashtagCursor(hashtag, feedEntries.data.posts[0].uri); 51 + if (VERBOSE) console.log("Most recent now:", await getHashtagCursor(hashtag)); 52 + } 53 + return; // out of pollFeed entirely 54 + } 55 + // // deno-lint-ignore no-explicit-any 56 + // const record = post.post.record as any 57 + // record.uri = post.post.uri; 58 + // record.cid = post.post.cid; 59 + try { 60 + if (!mostRecent) { 61 + const alreadyHandled = await hasAlreadyHandled(post.uri); 62 + if (alreadyHandled) { 63 + if (VERBOSE) console.log("Already handled this one:", post.uri); 64 + continue; 65 + } else if (alreadyHandled === null) { 66 + const recResponse = await agent.tools.ozone.moderation.queryStatuses({ 67 + subject: post.uri, 68 + }); 69 + if (recResponse.success && recResponse.data.subjectStatuses.length != 0) { 70 + if (recResponse.data.subjectStatuses[0].tags?.includes("auto-handled") || recResponse.data.subjectStatuses[0].reviewState != "lex:tools.ozone.moderation.defs#reviewNone") { 71 + continue; // skip this one, it's already been checked 72 + } 73 + } 74 + } 75 + } 76 + if (VERBOSE || DRY_RUN) console.log(post.uri, `(#${hashtag})`); 77 + try { 78 + await auditPostForKeywords(post, agent, me, {}) 79 + } catch(e) { 80 + console.error("Getting keywords for %s failed:", post.uri, e) 81 + anyPostFailed = true; 82 + } 83 + } catch(e) { 84 + console.error("Invalid post under hashtag:", post) 85 + console.error("Cause:", e) 86 + anyPostFailed = true; 87 + } 88 + } 89 + if (depth == 0 && !anyPostFailed) { 90 + if (VERBOSE) console.log("Most recent before:", mostRecent); 91 + await setHashtagCursor(hashtag, feedEntries.data.posts[0].uri); 92 + if (VERBOSE) console.log("Most recent now:", await getHashtagCursor(hashtag)); 93 + } 94 + const limit = mostRecent ? PAGES_LIMIT : INITIAL_PAGES_LIMIT || PAGES_LIMIT 95 + if (depth < limit && feedEntries.data.cursor) await pollHashtag(agent, me, hashtag, feedEntries.data.cursor, depth+1) 96 + } catch(e) { 97 + console.error("Error when polling #%s:", hashtag, e) 98 + } 99 + }