search for standard sites pub-search.waow.tech/
search zig blog atproto

feat: add script to enumerate site.standard repos for TAP

TAP only supports one signal collection, so we signal on pub.leaflet.document
to discover Leaflet users. This script enumerates repos with site.standard.publication
records (pckt, etc) and adds them to TAP via the /repos/add endpoint.

Run manually or periodically to ensure site.standard repos are indexed.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Changed files
+109
scripts
+109
scripts/enumerate-standard-repos
··· 1 + #!/usr/bin/env -S uv run --script --quiet 2 + # /// script 3 + # requires-python = ">=3.12" 4 + # dependencies = ["httpx"] 5 + # /// 6 + """ 7 + Enumerate repos with site.standard.* records and add them to TAP. 8 + 9 + TAP only signals on one collection, so we use this to discover repos 10 + that use site.standard.publication (pckt, etc) and add them to TAP. 11 + 12 + Usage: 13 + ./scripts/enumerate-standard-repos 14 + ./scripts/enumerate-standard-repos --dry-run 15 + """ 16 + 17 + import argparse 18 + import sys 19 + 20 + import httpx 21 + 22 + RELAY_URL = "https://relay1.us-east.bsky.network" 23 + TAP_URL = "http://leaflet-search-tap.internal:2480" # fly internal network 24 + COLLECTION = "site.standard.publication" 25 + 26 + 27 + def enumerate_repos(relay_url: str, collection: str) -> list[str]: 28 + """Enumerate all repos with records in the given collection.""" 29 + dids = [] 30 + cursor = None 31 + 32 + print(f"enumerating repos with {collection}...") 33 + 34 + while True: 35 + params = {"collection": collection, "limit": 1000} 36 + if cursor: 37 + params["cursor"] = cursor 38 + 39 + resp = httpx.get( 40 + f"{relay_url}/xrpc/com.atproto.sync.listReposByCollection", 41 + params=params, 42 + timeout=60, 43 + ) 44 + resp.raise_for_status() 45 + data = resp.json() 46 + 47 + repos = data.get("repos", []) 48 + for repo in repos: 49 + dids.append(repo["did"]) 50 + 51 + if not repos: 52 + break 53 + 54 + cursor = data.get("cursor") 55 + if not cursor: 56 + break 57 + 58 + print(f" found {len(dids)} repos so far...") 59 + 60 + return dids 61 + 62 + 63 + def add_repos_to_tap(tap_url: str, dids: list[str]) -> None: 64 + """Add repos to TAP for syncing.""" 65 + if not dids: 66 + return 67 + 68 + # batch in chunks of 100 69 + batch_size = 100 70 + for i in range(0, len(dids), batch_size): 71 + batch = dids[i:i + batch_size] 72 + resp = httpx.post( 73 + f"{tap_url}/repos/add", 74 + json={"dids": batch}, 75 + timeout=30, 76 + ) 77 + resp.raise_for_status() 78 + print(f" added batch {i // batch_size + 1}: {len(batch)} repos") 79 + 80 + 81 + def main(): 82 + parser = argparse.ArgumentParser(description="Enumerate and add standard.site repos to TAP") 83 + parser.add_argument("--dry-run", action="store_true", help="Show what would be done") 84 + parser.add_argument("--relay-url", default=RELAY_URL, help="Relay URL") 85 + parser.add_argument("--tap-url", default=TAP_URL, help="TAP URL") 86 + args = parser.parse_args() 87 + 88 + dids = enumerate_repos(args.relay_url, COLLECTION) 89 + print(f"found {len(dids)} repos with {COLLECTION}") 90 + 91 + if not dids: 92 + print("no repos to add") 93 + return 94 + 95 + if args.dry_run: 96 + print("dry run - would add these repos to TAP:") 97 + for did in dids[:10]: 98 + print(f" {did}") 99 + if len(dids) > 10: 100 + print(f" ... and {len(dids) - 10} more") 101 + return 102 + 103 + print(f"adding {len(dids)} repos to TAP...") 104 + add_repos_to_tap(args.tap_url, dids) 105 + print("done!") 106 + 107 + 108 + if __name__ == "__main__": 109 + main()