A tool for tailing the firehose and matching images against known perceptual hashes, and then labeling them.

Add user-configurable Hamming threshold

- Add PHASH_HAMMING_THRESHOLD environment variable to config with default of 3
- Update image processor to use config value instead of hardcoded 5
- Lower default hammingThreshold in rules from 5 to 3 for stricter matching
- Add Hamming threshold guide to README for reference

This allows users to control the global Hamming distance threshold via
environment variable while still allowing per-rule overrides. The lower
default of 3 reduces false positives from images that are too different.

Skywatch 95ebbfaa 905b9820

Changed files
+17 -4
rules
src
config
processor
+7
README.md
··· 75 75 ]; 76 76 ``` 77 77 78 + Hamming threshold guide: 79 + - 0 = Exact match only (very strict) 80 + - 1-2 = Nearly identical images (minor compression artifacts) 81 + - 3-4 = Very similar images (slight edits, crops) 82 + - 5-8 = Similar images (moderate edits) 83 + - 10+ = Loosely similar images (too permissive) 84 + 78 85 To generate a phash from an image: 79 86 80 87 ```bash
+3 -3
rules/blobs.ts
··· 2 2 3 3 export const BLOB_CHECKS: BlobCheck[] = [ 4 4 { 5 - phashes: ["e0e0e0e0e0fcfefe"], 5 + phashes: ["e0e0e0e0e0fcfefe", "9b9e00008f8fffff", "f090889c9ede9ee6"], 6 6 label: "troll", 7 7 comment: "Image is used in harrassment campaign targeting Will Stancil", 8 8 reportAcct: false, 9 9 labelAcct: true, 10 10 reportPost: true, 11 11 toLabel: true, 12 - hammingThreshold: 5, 12 + hammingThreshold: 3, 13 13 description: "Sample harassment image variants (placeholder hashes)", 14 14 ignoreDID: ["did:plc:7umvpuxe2vbrc3zrzuquzniu"], 15 15 }, ··· 21 21 labelAcct: false, 22 22 reportPost: false, 23 23 toLabel: true, 24 - hammingThreshold: 5, 24 + hammingThreshold: 3, 25 25 description: "Sample harassment image variants (placeholder hashes)", 26 26 }, 27 27 ];
+6
src/config/index.ts
··· 34 34 modDid: string; 35 35 rateLimit: number; 36 36 }; 37 + phash: { 38 + defaultHammingThreshold: number; 39 + }; 37 40 } 38 41 39 42 function getEnv(key: string, defaultValue?: string): string { ··· 98 101 moderation: { 99 102 modDid: getEnv("MOD_DID"), 100 103 rateLimit: getEnvNumber("RATE_LIMIT_MS", 100), 104 + }, 105 + phash: { 106 + defaultHammingThreshold: getEnvNumber("PHASH_HAMMING_THRESHOLD", 3), 101 107 }, 102 108 };
+1 -1
src/processor/image-processor.ts
··· 224 224 n &= n - 1n; 225 225 } 226 226 227 - const threshold = matchedCheck.hammingThreshold ?? 5; 227 + const threshold = matchedCheck.hammingThreshold ?? config.phash.defaultHammingThreshold; 228 228 if (distance <= threshold) { 229 229 return { 230 230 phash,