music on atproto
plyr.fm
1#!/usr/bin/env -S uv run --script --quiet
2"""backfill avatar_url for all artists from bluesky.
3
4## Context
5
6Avatar URLs were only set at artist creation and never refreshed. This caused
7stale/broken avatars throughout the app (likers tooltip, profiles, etc).
8
9PR #685 added avatar sync on login, but users who don't log in will still have
10stale avatars. This script does a one-time refresh of all avatars.
11
12## What This Script Does
13
141. Fetches all artists from the database
152. For each artist, fetches current avatar from Bluesky public API
163. Updates avatar_url in database if changed
174. Reports summary of changes
18
19## Usage
20
21```bash
22# dry run (show what would be updated)
23uv run scripts/backfill_avatars.py --dry-run
24
25# actually update the database
26uv run scripts/backfill_avatars.py
27
28# target specific environment
29DATABASE_URL=postgresql://... uv run scripts/backfill_avatars.py
30```
31"""
32
33import asyncio
34import logging
35import sys
36
37# scripts are run from backend/ directory via: uv run python ../scripts/backfill_avatars.py
38
39from sqlalchemy import select
40
41from backend._internal.atproto.profile import fetch_user_avatar
42from backend.models import Artist
43from backend.utilities.database import db_session
44
45logging.basicConfig(
46 level=logging.INFO,
47 format="%(asctime)s - %(levelname)s - %(message)s",
48)
49logger = logging.getLogger(__name__)
50
51# rate limit to avoid hammering bluesky API
52CONCURRENCY_LIMIT = 5
53DELAY_BETWEEN_BATCHES = 0.5 # seconds
54
55
56async def backfill_avatars(dry_run: bool = False) -> None:
57 """backfill avatar_url for all artists from bluesky."""
58
59 async with db_session() as db:
60 result = await db.execute(select(Artist))
61 artists = result.scalars().all()
62
63 if not artists:
64 logger.info("no artists found")
65 return
66
67 logger.info(f"found {len(artists)} artists to check")
68
69 if dry_run:
70 logger.info("dry run mode - checking avatars without updating:")
71
72 updated = 0
73 unchanged = 0
74 failed = 0
75 cleared = 0
76
77 # process in batches to rate limit
78 semaphore = asyncio.Semaphore(CONCURRENCY_LIMIT)
79
80 async def process_artist(
81 artist: Artist,
82 ) -> tuple[str, str | None, str | None, Exception | None]:
83 """fetch avatar for artist, return (did, old_url, new_url, error)."""
84 async with semaphore:
85 try:
86 fresh_avatar = await fetch_user_avatar(artist.did)
87 return (artist.did, artist.avatar_url, fresh_avatar, None)
88 except Exception as e:
89 return (artist.did, artist.avatar_url, None, e)
90
91 # fetch all avatars concurrently (with semaphore limiting)
92 logger.info("fetching avatars from bluesky...")
93 results = await asyncio.gather(*[process_artist(a) for a in artists])
94
95 # process results
96 for did, old_url, new_url, error in results:
97 artist = next(a for a in artists if a.did == did)
98
99 if error:
100 failed += 1
101 logger.warning(f"failed to fetch avatar for {artist.handle}: {error}")
102 continue
103
104 if old_url == new_url:
105 unchanged += 1
106 continue
107
108 if new_url is None and old_url is not None:
109 cleared += 1
110 action = "would clear" if dry_run else "clearing"
111 logger.info(
112 f"{action} avatar for {artist.handle} (was: {old_url[:50]}...)"
113 )
114 elif new_url is not None and old_url is None:
115 updated += 1
116 action = "would set" if dry_run else "setting"
117 logger.info(f"{action} avatar for {artist.handle}")
118 else:
119 updated += 1
120 action = "would update" if dry_run else "updating"
121 logger.info(f"{action} avatar for {artist.handle}")
122
123 if not dry_run:
124 artist.avatar_url = new_url
125
126 if not dry_run:
127 await db.commit()
128
129 logger.info(
130 f"backfill complete: {updated} updated, {cleared} cleared, "
131 f"{unchanged} unchanged, {failed} failed"
132 )
133
134
135async def main() -> None:
136 """main entry point."""
137 dry_run = "--dry-run" in sys.argv
138
139 if dry_run:
140 logger.info("running in DRY RUN mode - no changes will be made")
141
142 await backfill_avatars(dry_run=dry_run)
143
144
145if __name__ == "__main__":
146 asyncio.run(main())