commit 789b1b7bc46cd8e96f33dce9040cc3ad0ca5acc0 · hailey.at/bloblens

+29

Dockerfile

··· 1 + # Build stage 2 + FROM golang:1.25.5-alpine AS builder 3 + 4 + WORKDIR /build 5 + 6 + # Install build dependencies 7 + RUN apk add --no-cache git ca-certificates 8 + 9 + # Copy go mod files 10 + COPY go.mod go.sum ./ 11 + RUN go mod download 12 + 13 + # Copy source code 14 + COPY . . 15 + 16 + # Build the application 17 + RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o bloblens ./cmd/bloblens 18 + 19 + # Runtime stage 20 + FROM alpine:latest 21 + 22 + RUN apk --no-cache add ca-certificates 23 + 24 + WORKDIR /app 25 + 26 + # Copy the binary from builder 27 + COPY --from=builder /build/bloblens . 28 + 29 + ENTRYPOINT ["/app/bloblens"]

+67

README.md

··· 1 + # bloblens 2 + 3 + Tracks similar avatars and banners across Bluesky by hashing profile images and comparing them. 4 + 5 + ## What it does 6 + 7 + Listens to the ATProto firehose for profile updates, hashes any avatar/banner images using PDQ perceptual hashing, and stores them in a vector database. When it finds multiple profiles using similar images, it logs them. 8 + 9 + Useful for finding coordinated behavior, tracking ban evasion, or whatever else 10 + 11 + ## Setup 12 + 13 + You'll need three things running: 14 + - **bloblens** (this) 15 + - **Retina** - image hashing service (handles PDQ hashing) 16 + - **Qdrant** - vector database (stores and searches hashes) 17 + 18 + ### With Docker Compose 19 + 20 + ```bash 21 + docker-compose up -d 22 + ``` 23 + 24 + That's it. It'll create the Qdrant collection automatically on first run. 25 + 26 + ### Manual 27 + 28 + Build: 29 + ```bash 30 + go build -o bloblens ./cmd/bloblens 31 + ``` 32 + 33 + Run: 34 + ```bash 35 + ./bloblens \ 36 + --retina-host=http://localhost:8080 \ 37 + --websocket-host=wss://jetstream.atproto.tools/subscribe \ 38 + --qdrant-host=localhost \ 39 + --qdrant-port=6334 \ 40 + --qdrant-collection=blobs \ 41 + --max-hamming-distance=31 \ 42 + --seen-threshold=5 43 + ``` 44 + 45 + ## Configuration 46 + 47 + | Flag | Description | Default | 48 + |------|-------------|---------| 49 + | `--retina-host` | URL of your Retina instance | required | 50 + | `--websocket-host` | ATProto firehose URL | required | 51 + | `--qdrant-host` | Qdrant server hostname | required | 52 + | `--qdrant-port` | Qdrant gRPC port | required | 53 + | `--qdrant-collection` | Collection name for storing hashes | required | 54 + | `--max-hamming-distance` | Max distance to consider images "similar" | 31 | 55 + | `--seen-threshold` | How many matches before logging | required | 56 + | `--max-limit` | Max results to fetch from Qdrant per search | required | 57 + | `--max-search-time` | Timeout for vector searches | required | 58 + 59 + 60 + ## How it works 61 + 62 + 1. Watches the firehose for `app.bsky.actor.profile` records 63 + 2. Extracts avatar and banner blob refs 64 + 3. Sends them to Retina for PDQ hashing 65 + 4. Stores the 256-dimensional vector in Qdrant 66 + 5. Searches Qdrant for similar existing hashes 67 + 6. Logs when threshold is exceeded

+49

docker-compose.yml

··· 1 + services: 2 + qdrant: 3 + image: qdrant/qdrant:latest 4 + ports: 5 + - "6333:6333" 6 + - "6334:6334" 7 + volumes: 8 + - qdrant_storage:/qdrant/storage 9 + environment: 10 + - QDRANT_HOST=0.0.0.0 11 + restart: unless-stopped 12 + 13 + retina: 14 + image: # INSERT YOUR GHCR LINK HERE 15 + ports: 16 + - "8080:8080" 17 + restart: unless-stopped 18 + 19 + bloblens: 20 + build: 21 + context: . 22 + dockerfile: Dockerfile 23 + depends_on: 24 + - qdrant 25 + - retina 26 + environment: 27 + - RETINA_HOST=http://retina:8080 28 + - WEBSOCKET_HOST=wss://jetstream.atproto.tools/subscribe 29 + - QDRANT_HOST=qdrant 30 + - QDRANT_PORT=6334 31 + - QDRANT_COLLECTION=blobs 32 + - MAX_SEARCH_TIME=5s 33 + - MAX_LIMIT=100 34 + - SEEN_THRESHOLD=5 35 + - MAX_HAMMING_DISTANCE=31 36 + command: 37 + - --retina-host=${RETINA_HOST} 38 + - --websocket-host=${WEBSOCKET_HOST} 39 + - --qdrant-host=${QDRANT_HOST} 40 + - --qdrant-port=${QDRANT_PORT} 41 + - --qdrant-collection=${QDRANT_COLLECTION} 42 + - --max-search-time=${MAX_SEARCH_TIME} 43 + - --max-limit=${MAX_LIMIT} 44 + - --seen-threshold=${SEEN_THRESHOLD} 45 + - --max-hamming-distance=${MAX_HAMMING_DISTANCE} 46 + restart: unless-stopped 47 + 48 + volumes: 49 + qdrant_storage: