A tool for parsing traffic on the jetstream and applying a moderation workstream based on regexp based rules

non-malicious bots were catching strays over hashtags

Changed files
+55 -5
src
rules
facets
+11 -3
src/rules/facets/facets.ts
··· 24 24 return; 25 25 } 26 26 27 - // Group facets by their byte position (byteStart:byteEnd) 27 + // Group mention facets by their byte position (byteStart:byteEnd) 28 + // Only check mentions as duplicate tags/links are often bot bugs, not malicious 28 29 const positionMap = new Map<string, number>(); 29 30 30 31 for (const facet of facets) { 31 - const key = `${facet.index.byteStart}:${facet.index.byteEnd}`; 32 - positionMap.set(key, (positionMap.get(key) || 0) + 1); 32 + // Only count mentions for spam detection 33 + const hasMention = facet.features.some( 34 + (feature) => feature.$type === "app.bsky.richtext.facet#mention" 35 + ); 36 + 37 + if (hasMention) { 38 + const key = `${facet.index.byteStart}:${facet.index.byteEnd}`; 39 + positionMap.set(key, (positionMap.get(key) || 0) + 1); 40 + } 33 41 } 34 42 35 43 // Check if any position has more than the threshold
+44 -2
src/rules/facets/tests/facets.test.ts
··· 92 92 93 93 expect(createAccountLabel).not.toHaveBeenCalled(); 94 94 }); 95 + 96 + it("should not label when duplicate tags/links at same position (bot bugs)", async () => { 97 + const facets: Facet[] = [ 98 + { 99 + index: { byteStart: 38, byteEnd: 43 }, 100 + features: [{ $type: "app.bsky.richtext.facet#tag", tag: "news" }], 101 + }, 102 + { 103 + index: { byteStart: 38, byteEnd: 43 }, 104 + features: [{ $type: "app.bsky.richtext.facet#tag", tag: "News" }], 105 + }, 106 + ]; 107 + 108 + await checkFacetSpam(TEST_DID, TEST_TIME, TEST_URI, facets); 109 + 110 + // Should not trigger - only mentions are checked 111 + expect(createAccountLabel).not.toHaveBeenCalled(); 112 + expect(logger.info).not.toHaveBeenCalled(); 113 + }); 114 + 115 + it("should not label when duplicate links at same position", async () => { 116 + const facets: Facet[] = [ 117 + { 118 + index: { byteStart: 0, byteEnd: 10 }, 119 + features: [{ $type: "app.bsky.richtext.facet#link", uri: "https://example.com" }], 120 + }, 121 + { 122 + index: { byteStart: 0, byteEnd: 10 }, 123 + features: [{ $type: "app.bsky.richtext.facet#link", uri: "https://example.org" }], 124 + }, 125 + ]; 126 + 127 + await checkFacetSpam(TEST_DID, TEST_TIME, TEST_URI, facets); 128 + 129 + // Should not trigger - only mentions are checked 130 + expect(createAccountLabel).not.toHaveBeenCalled(); 131 + expect(logger.info).not.toHaveBeenCalled(); 132 + }); 95 133 }); 96 134 97 135 describe("when spam is detected", () => { ··· 178 216 expect(createAccountLabel).toHaveBeenCalledOnce(); 179 217 }); 180 218 181 - it("should handle different feature types at same position", async () => { 219 + it("should handle mixed feature types - only mentions at same position count", async () => { 182 220 const facets: Facet[] = [ 183 221 { 184 222 index: { byteStart: 0, byteEnd: 1 }, ··· 186 224 }, 187 225 { 188 226 index: { byteStart: 0, byteEnd: 1 }, 227 + features: [{ $type: "app.bsky.richtext.facet#mention", did: "did:plc:user2" }], 228 + }, 229 + { 230 + index: { byteStart: 0, byteEnd: 1 }, 189 231 features: [{ $type: "app.bsky.richtext.facet#link", uri: "https://example.com" }], 190 232 }, 191 233 ]; 192 234 193 235 await checkFacetSpam(TEST_DID, TEST_TIME, TEST_URI, facets); 194 236 195 - // Should still detect as spam regardless of feature type 237 + // Should detect spam (2 mentions at same position) 196 238 expect(createAccountLabel).toHaveBeenCalledOnce(); 197 239 }); 198 240 });