declarative relay deployment on hetzner
relay.waow.tech
atproto
1#!/usr/bin/env -S PYTHONUNBUFFERED=1 uv run --script --quiet
2# /// script
3# requires-python = ">=3.12"
4# dependencies = []
5# ///
6"""
7compare listReposByCollection results between two relay endpoints.
8
9fetches all DIDs for a collection from both endpoints and reports the
10difference — which DIDs are unique to each side.
11
12usage:
13 ./scripts/collectiondir-diff --collection io.atcr.sailor.profile
14 ./scripts/collectiondir-diff --collection app.bsky.feed.post --limit 10000
15 ./scripts/collectiondir-diff --collection io.atcr.sailor.profile \
16 --a https://relay.waow.tech --b https://relay1.us-east.bsky.network
17"""
18
19import argparse
20import json
21import sys
22import urllib.request
23import urllib.error
24
25
26def fetch_dids(base_url: str, collection: str, limit: int) -> set[str]:
27 """paginate listReposByCollection and return all DIDs up to limit."""
28 endpoint = f"{base_url.rstrip('/')}/xrpc/com.atproto.sync.listReposByCollection"
29 dids: set[str] = set()
30 cursor = None
31 page_size = min(limit, 2000)
32
33 while len(dids) < limit:
34 params = f"collection={collection}&limit={page_size}"
35 if cursor:
36 params += f"&cursor={urllib.request.quote(cursor)}"
37
38 url = f"{endpoint}?{params}"
39 try:
40 req = urllib.request.Request(url)
41 with urllib.request.urlopen(req, timeout=30) as resp:
42 data = json.loads(resp.read())
43 except urllib.error.HTTPError as e:
44 print(f" error fetching {base_url}: {e.code} {e.reason}", file=sys.stderr)
45 break
46 except Exception as e:
47 print(f" error fetching {base_url}: {e}", file=sys.stderr)
48 break
49
50 repos = data.get("repos", [])
51 if not repos:
52 break
53
54 for repo in repos:
55 dids.add(repo["did"])
56
57 cursor = data.get("cursor")
58 if not cursor:
59 break
60
61 sys.stdout.write(f"\r {base_url}: {len(dids)} DIDs fetched...")
62 sys.stdout.flush()
63
64 return dids
65
66
67def resolve_did(did: str) -> dict:
68 """resolve a DID via plc.directory. returns handle + PDS host."""
69 try:
70 url = f"https://plc.directory/{did}"
71 with urllib.request.urlopen(url, timeout=10) as resp:
72 data = json.loads(resp.read())
73 handle = "?"
74 aka = data.get("alsoKnownAs", [])
75 if aka:
76 handle = aka[0].replace("at://", "")
77 pds = "?"
78 services = data.get("service", [])
79 if services:
80 pds = services[0].get("serviceEndpoint", "?")
81 return {"handle": handle, "pds": pds}
82 except Exception:
83 return {"handle": "?", "pds": "?"}
84
85
86def main():
87 parser = argparse.ArgumentParser(
88 description="compare listReposByCollection between two relay endpoints"
89 )
90 parser.add_argument("--collection", required=True, help="collection NSID to compare")
91 parser.add_argument(
92 "--a",
93 default="https://relay.waow.tech",
94 help="first endpoint (default: https://relay.waow.tech)",
95 )
96 parser.add_argument(
97 "--b",
98 default="https://bsky.network",
99 help="second endpoint (default: https://bsky.network)",
100 )
101 parser.add_argument(
102 "--limit",
103 type=int,
104 default=100_000,
105 help="max DIDs to fetch per endpoint (default: 100000)",
106 )
107 parser.add_argument(
108 "--resolve",
109 action="store_true",
110 help="resolve DIDs to handles via plc.directory (slower)",
111 )
112 args = parser.parse_args()
113
114 print(f"collection: {args.collection}")
115 print(f" A: {args.a}")
116 print(f" B: {args.b}")
117 print(f" limit: {args.limit:,}")
118 print()
119
120 dids_a = fetch_dids(args.a, args.collection, args.limit)
121 print(f"\r A: {len(dids_a):,} DIDs" + " " * 40)
122
123 dids_b = fetch_dids(args.b, args.collection, args.limit)
124 print(f"\r B: {len(dids_b):,} DIDs" + " " * 40)
125 print()
126
127 only_a = dids_a - dids_b
128 only_b = dids_b - dids_a
129 common = dids_a & dids_b
130
131 print(f"common: {len(common):,}")
132 print(f"only in A ({args.a}): {len(only_a):,}")
133 print(f"only in B ({args.b}): {len(only_b):,}")
134
135 def print_dids(label: str, dids: set[str]):
136 if not dids:
137 return
138 print(f"\n{label}:")
139 for did in sorted(dids):
140 if args.resolve:
141 info = resolve_did(did)
142 print(f" {did} @{info['handle']} ({info['pds']})")
143 else:
144 print(f" {did}")
145
146 print_dids(f"only in A", only_a)
147 print_dids(f"only in B", only_b)
148
149 if not only_a and not only_b:
150 print("\nidentical.")
151
152
153if __name__ == "__main__":
154 main()