declarative relay deployment on hetzner relay.waow.tech
atproto
at main 154 lines 4.7 kB view raw
1#!/usr/bin/env -S PYTHONUNBUFFERED=1 uv run --script --quiet 2# /// script 3# requires-python = ">=3.12" 4# dependencies = [] 5# /// 6""" 7compare listReposByCollection results between two relay endpoints. 8 9fetches all DIDs for a collection from both endpoints and reports the 10difference — which DIDs are unique to each side. 11 12usage: 13 ./scripts/collectiondir-diff --collection io.atcr.sailor.profile 14 ./scripts/collectiondir-diff --collection app.bsky.feed.post --limit 10000 15 ./scripts/collectiondir-diff --collection io.atcr.sailor.profile \ 16 --a https://relay.waow.tech --b https://relay1.us-east.bsky.network 17""" 18 19import argparse 20import json 21import sys 22import urllib.request 23import urllib.error 24 25 26def fetch_dids(base_url: str, collection: str, limit: int) -> set[str]: 27 """paginate listReposByCollection and return all DIDs up to limit.""" 28 endpoint = f"{base_url.rstrip('/')}/xrpc/com.atproto.sync.listReposByCollection" 29 dids: set[str] = set() 30 cursor = None 31 page_size = min(limit, 2000) 32 33 while len(dids) < limit: 34 params = f"collection={collection}&limit={page_size}" 35 if cursor: 36 params += f"&cursor={urllib.request.quote(cursor)}" 37 38 url = f"{endpoint}?{params}" 39 try: 40 req = urllib.request.Request(url) 41 with urllib.request.urlopen(req, timeout=30) as resp: 42 data = json.loads(resp.read()) 43 except urllib.error.HTTPError as e: 44 print(f" error fetching {base_url}: {e.code} {e.reason}", file=sys.stderr) 45 break 46 except Exception as e: 47 print(f" error fetching {base_url}: {e}", file=sys.stderr) 48 break 49 50 repos = data.get("repos", []) 51 if not repos: 52 break 53 54 for repo in repos: 55 dids.add(repo["did"]) 56 57 cursor = data.get("cursor") 58 if not cursor: 59 break 60 61 sys.stdout.write(f"\r {base_url}: {len(dids)} DIDs fetched...") 62 sys.stdout.flush() 63 64 return dids 65 66 67def resolve_did(did: str) -> dict: 68 """resolve a DID via plc.directory. returns handle + PDS host.""" 69 try: 70 url = f"https://plc.directory/{did}" 71 with urllib.request.urlopen(url, timeout=10) as resp: 72 data = json.loads(resp.read()) 73 handle = "?" 74 aka = data.get("alsoKnownAs", []) 75 if aka: 76 handle = aka[0].replace("at://", "") 77 pds = "?" 78 services = data.get("service", []) 79 if services: 80 pds = services[0].get("serviceEndpoint", "?") 81 return {"handle": handle, "pds": pds} 82 except Exception: 83 return {"handle": "?", "pds": "?"} 84 85 86def main(): 87 parser = argparse.ArgumentParser( 88 description="compare listReposByCollection between two relay endpoints" 89 ) 90 parser.add_argument("--collection", required=True, help="collection NSID to compare") 91 parser.add_argument( 92 "--a", 93 default="https://relay.waow.tech", 94 help="first endpoint (default: https://relay.waow.tech)", 95 ) 96 parser.add_argument( 97 "--b", 98 default="https://bsky.network", 99 help="second endpoint (default: https://bsky.network)", 100 ) 101 parser.add_argument( 102 "--limit", 103 type=int, 104 default=100_000, 105 help="max DIDs to fetch per endpoint (default: 100000)", 106 ) 107 parser.add_argument( 108 "--resolve", 109 action="store_true", 110 help="resolve DIDs to handles via plc.directory (slower)", 111 ) 112 args = parser.parse_args() 113 114 print(f"collection: {args.collection}") 115 print(f" A: {args.a}") 116 print(f" B: {args.b}") 117 print(f" limit: {args.limit:,}") 118 print() 119 120 dids_a = fetch_dids(args.a, args.collection, args.limit) 121 print(f"\r A: {len(dids_a):,} DIDs" + " " * 40) 122 123 dids_b = fetch_dids(args.b, args.collection, args.limit) 124 print(f"\r B: {len(dids_b):,} DIDs" + " " * 40) 125 print() 126 127 only_a = dids_a - dids_b 128 only_b = dids_b - dids_a 129 common = dids_a & dids_b 130 131 print(f"common: {len(common):,}") 132 print(f"only in A ({args.a}): {len(only_a):,}") 133 print(f"only in B ({args.b}): {len(only_b):,}") 134 135 def print_dids(label: str, dids: set[str]): 136 if not dids: 137 return 138 print(f"\n{label}:") 139 for did in sorted(dids): 140 if args.resolve: 141 info = resolve_did(did) 142 print(f" {did} @{info['handle']} ({info['pds']})") 143 else: 144 print(f" {did}") 145 146 print_dids(f"only in A", only_a) 147 print_dids(f"only in B", only_b) 148 149 if not only_a and not only_b: 150 print("\nidentical.") 151 152 153if __name__ == "__main__": 154 main()