+109
scripts/enumerate-standard-repos
+109
scripts/enumerate-standard-repos
···
1
+
#!/usr/bin/env -S uv run --script --quiet
2
+
# /// script
3
+
# requires-python = ">=3.12"
4
+
# dependencies = ["httpx"]
5
+
# ///
6
+
"""
7
+
Enumerate repos with site.standard.* records and add them to TAP.
8
+
9
+
TAP only signals on one collection, so we use this to discover repos
10
+
that use site.standard.publication (pckt, etc) and add them to TAP.
11
+
12
+
Usage:
13
+
./scripts/enumerate-standard-repos
14
+
./scripts/enumerate-standard-repos --dry-run
15
+
"""
16
+
17
+
import argparse
18
+
import sys
19
+
20
+
import httpx
21
+
22
+
RELAY_URL = "https://relay1.us-east.bsky.network"
23
+
TAP_URL = "http://leaflet-search-tap.internal:2480" # fly internal network
24
+
COLLECTION = "site.standard.publication"
25
+
26
+
27
+
def enumerate_repos(relay_url: str, collection: str) -> list[str]:
28
+
"""Enumerate all repos with records in the given collection."""
29
+
dids = []
30
+
cursor = None
31
+
32
+
print(f"enumerating repos with {collection}...")
33
+
34
+
while True:
35
+
params = {"collection": collection, "limit": 1000}
36
+
if cursor:
37
+
params["cursor"] = cursor
38
+
39
+
resp = httpx.get(
40
+
f"{relay_url}/xrpc/com.atproto.sync.listReposByCollection",
41
+
params=params,
42
+
timeout=60,
43
+
)
44
+
resp.raise_for_status()
45
+
data = resp.json()
46
+
47
+
repos = data.get("repos", [])
48
+
for repo in repos:
49
+
dids.append(repo["did"])
50
+
51
+
if not repos:
52
+
break
53
+
54
+
cursor = data.get("cursor")
55
+
if not cursor:
56
+
break
57
+
58
+
print(f" found {len(dids)} repos so far...")
59
+
60
+
return dids
61
+
62
+
63
+
def add_repos_to_tap(tap_url: str, dids: list[str]) -> None:
64
+
"""Add repos to TAP for syncing."""
65
+
if not dids:
66
+
return
67
+
68
+
# batch in chunks of 100
69
+
batch_size = 100
70
+
for i in range(0, len(dids), batch_size):
71
+
batch = dids[i:i + batch_size]
72
+
resp = httpx.post(
73
+
f"{tap_url}/repos/add",
74
+
json={"dids": batch},
75
+
timeout=30,
76
+
)
77
+
resp.raise_for_status()
78
+
print(f" added batch {i // batch_size + 1}: {len(batch)} repos")
79
+
80
+
81
+
def main():
82
+
parser = argparse.ArgumentParser(description="Enumerate and add standard.site repos to TAP")
83
+
parser.add_argument("--dry-run", action="store_true", help="Show what would be done")
84
+
parser.add_argument("--relay-url", default=RELAY_URL, help="Relay URL")
85
+
parser.add_argument("--tap-url", default=TAP_URL, help="TAP URL")
86
+
args = parser.parse_args()
87
+
88
+
dids = enumerate_repos(args.relay_url, COLLECTION)
89
+
print(f"found {len(dids)} repos with {COLLECTION}")
90
+
91
+
if not dids:
92
+
print("no repos to add")
93
+
return
94
+
95
+
if args.dry_run:
96
+
print("dry run - would add these repos to TAP:")
97
+
for did in dids[:10]:
98
+
print(f" {did}")
99
+
if len(dids) > 10:
100
+
print(f" ... and {len(dids) - 10} more")
101
+
return
102
+
103
+
print(f"adding {len(dids)} repos to TAP...")
104
+
add_repos_to_tap(args.tap_url, dids)
105
+
print("done!")
106
+
107
+
108
+
if __name__ == "__main__":
109
+
main()