+29
Dockerfile
+29
Dockerfile
···
1
+
# Build stage
2
+
FROM golang:1.25.5-alpine AS builder
3
+
4
+
WORKDIR /build
5
+
6
+
# Install build dependencies
7
+
RUN apk add --no-cache git ca-certificates
8
+
9
+
# Copy go mod files
10
+
COPY go.mod go.sum ./
11
+
RUN go mod download
12
+
13
+
# Copy source code
14
+
COPY . .
15
+
16
+
# Build the application
17
+
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o bloblens ./cmd/bloblens
18
+
19
+
# Runtime stage
20
+
FROM alpine:latest
21
+
22
+
RUN apk --no-cache add ca-certificates
23
+
24
+
WORKDIR /app
25
+
26
+
# Copy the binary from builder
27
+
COPY --from=builder /build/bloblens .
28
+
29
+
ENTRYPOINT ["/app/bloblens"]
+67
README.md
+67
README.md
···
1
+
# bloblens
2
+
3
+
Tracks similar avatars and banners across Bluesky by hashing profile images and comparing them.
4
+
5
+
## What it does
6
+
7
+
Listens to the ATProto firehose for profile updates, hashes any avatar/banner images using PDQ perceptual hashing, and stores them in a vector database. When it finds multiple profiles using similar images, it logs them.
8
+
9
+
Useful for finding coordinated behavior, tracking ban evasion, or whatever else
10
+
11
+
## Setup
12
+
13
+
You'll need three things running:
14
+
- **bloblens** (this)
15
+
- **Retina** - image hashing service (handles PDQ hashing)
16
+
- **Qdrant** - vector database (stores and searches hashes)
17
+
18
+
### With Docker Compose
19
+
20
+
```bash
21
+
docker-compose up -d
22
+
```
23
+
24
+
That's it. It'll create the Qdrant collection automatically on first run.
25
+
26
+
### Manual
27
+
28
+
Build:
29
+
```bash
30
+
go build -o bloblens ./cmd/bloblens
31
+
```
32
+
33
+
Run:
34
+
```bash
35
+
./bloblens \
36
+
--retina-host=http://localhost:8080 \
37
+
--websocket-host=wss://jetstream.atproto.tools/subscribe \
38
+
--qdrant-host=localhost \
39
+
--qdrant-port=6334 \
40
+
--qdrant-collection=blobs \
41
+
--max-hamming-distance=31 \
42
+
--seen-threshold=5
43
+
```
44
+
45
+
## Configuration
46
+
47
+
| Flag | Description | Default |
48
+
|------|-------------|---------|
49
+
| `--retina-host` | URL of your Retina instance | required |
50
+
| `--websocket-host` | ATProto firehose URL | required |
51
+
| `--qdrant-host` | Qdrant server hostname | required |
52
+
| `--qdrant-port` | Qdrant gRPC port | required |
53
+
| `--qdrant-collection` | Collection name for storing hashes | required |
54
+
| `--max-hamming-distance` | Max distance to consider images "similar" | 31 |
55
+
| `--seen-threshold` | How many matches before logging | required |
56
+
| `--max-limit` | Max results to fetch from Qdrant per search | required |
57
+
| `--max-search-time` | Timeout for vector searches | required |
58
+
59
+
60
+
## How it works
61
+
62
+
1. Watches the firehose for `app.bsky.actor.profile` records
63
+
2. Extracts avatar and banner blob refs
64
+
3. Sends them to Retina for PDQ hashing
65
+
4. Stores the 256-dimensional vector in Qdrant
66
+
5. Searches Qdrant for similar existing hashes
67
+
6. Logs when threshold is exceeded
+49
docker-compose.yml
+49
docker-compose.yml
···
1
+
services:
2
+
qdrant:
3
+
image: qdrant/qdrant:latest
4
+
ports:
5
+
- "6333:6333"
6
+
- "6334:6334"
7
+
volumes:
8
+
- qdrant_storage:/qdrant/storage
9
+
environment:
10
+
- QDRANT_HOST=0.0.0.0
11
+
restart: unless-stopped
12
+
13
+
retina:
14
+
image: # INSERT YOUR GHCR LINK HERE
15
+
ports:
16
+
- "8080:8080"
17
+
restart: unless-stopped
18
+
19
+
bloblens:
20
+
build:
21
+
context: .
22
+
dockerfile: Dockerfile
23
+
depends_on:
24
+
- qdrant
25
+
- retina
26
+
environment:
27
+
- RETINA_HOST=http://retina:8080
28
+
- WEBSOCKET_HOST=wss://jetstream.atproto.tools/subscribe
29
+
- QDRANT_HOST=qdrant
30
+
- QDRANT_PORT=6334
31
+
- QDRANT_COLLECTION=blobs
32
+
- MAX_SEARCH_TIME=5s
33
+
- MAX_LIMIT=100
34
+
- SEEN_THRESHOLD=5
35
+
- MAX_HAMMING_DISTANCE=31
36
+
command:
37
+
- --retina-host=${RETINA_HOST}
38
+
- --websocket-host=${WEBSOCKET_HOST}
39
+
- --qdrant-host=${QDRANT_HOST}
40
+
- --qdrant-port=${QDRANT_PORT}
41
+
- --qdrant-collection=${QDRANT_COLLECTION}
42
+
- --max-search-time=${MAX_SEARCH_TIME}
43
+
- --max-limit=${MAX_LIMIT}
44
+
- --seen-threshold=${SEEN_THRESHOLD}
45
+
- --max-hamming-distance=${MAX_HAMMING_DISTANCE}
46
+
restart: unless-stopped
47
+
48
+
volumes:
49
+
qdrant_storage: