at main 4.5 kB view raw
1#!/usr/bin/env -S uv run --script --quiet 2"""backfill avatar_url for all artists from bluesky. 3 4## Context 5 6Avatar URLs were only set at artist creation and never refreshed. This caused 7stale/broken avatars throughout the app (likers tooltip, profiles, etc). 8 9PR #685 added avatar sync on login, but users who don't log in will still have 10stale avatars. This script does a one-time refresh of all avatars. 11 12## What This Script Does 13 141. Fetches all artists from the database 152. For each artist, fetches current avatar from Bluesky public API 163. Updates avatar_url in database if changed 174. Reports summary of changes 18 19## Usage 20 21```bash 22# dry run (show what would be updated) 23uv run scripts/backfill_avatars.py --dry-run 24 25# actually update the database 26uv run scripts/backfill_avatars.py 27 28# target specific environment 29DATABASE_URL=postgresql://... uv run scripts/backfill_avatars.py 30``` 31""" 32 33import asyncio 34import logging 35import sys 36 37# scripts are run from backend/ directory via: uv run python ../scripts/backfill_avatars.py 38 39from sqlalchemy import select 40 41from backend._internal.atproto.profile import fetch_user_avatar 42from backend.models import Artist 43from backend.utilities.database import db_session 44 45logging.basicConfig( 46 level=logging.INFO, 47 format="%(asctime)s - %(levelname)s - %(message)s", 48) 49logger = logging.getLogger(__name__) 50 51# rate limit to avoid hammering bluesky API 52CONCURRENCY_LIMIT = 5 53DELAY_BETWEEN_BATCHES = 0.5 # seconds 54 55 56async def backfill_avatars(dry_run: bool = False) -> None: 57 """backfill avatar_url for all artists from bluesky.""" 58 59 async with db_session() as db: 60 result = await db.execute(select(Artist)) 61 artists = result.scalars().all() 62 63 if not artists: 64 logger.info("no artists found") 65 return 66 67 logger.info(f"found {len(artists)} artists to check") 68 69 if dry_run: 70 logger.info("dry run mode - checking avatars without updating:") 71 72 updated = 0 73 unchanged = 0 74 failed = 0 75 cleared = 0 76 77 # process in batches to rate limit 78 semaphore = asyncio.Semaphore(CONCURRENCY_LIMIT) 79 80 async def process_artist( 81 artist: Artist, 82 ) -> tuple[str, str | None, str | None, Exception | None]: 83 """fetch avatar for artist, return (did, old_url, new_url, error).""" 84 async with semaphore: 85 try: 86 fresh_avatar = await fetch_user_avatar(artist.did) 87 return (artist.did, artist.avatar_url, fresh_avatar, None) 88 except Exception as e: 89 return (artist.did, artist.avatar_url, None, e) 90 91 # fetch all avatars concurrently (with semaphore limiting) 92 logger.info("fetching avatars from bluesky...") 93 results = await asyncio.gather(*[process_artist(a) for a in artists]) 94 95 # process results 96 for did, old_url, new_url, error in results: 97 artist = next(a for a in artists if a.did == did) 98 99 if error: 100 failed += 1 101 logger.warning(f"failed to fetch avatar for {artist.handle}: {error}") 102 continue 103 104 if old_url == new_url: 105 unchanged += 1 106 continue 107 108 if new_url is None and old_url is not None: 109 cleared += 1 110 action = "would clear" if dry_run else "clearing" 111 logger.info( 112 f"{action} avatar for {artist.handle} (was: {old_url[:50]}...)" 113 ) 114 elif new_url is not None and old_url is None: 115 updated += 1 116 action = "would set" if dry_run else "setting" 117 logger.info(f"{action} avatar for {artist.handle}") 118 else: 119 updated += 1 120 action = "would update" if dry_run else "updating" 121 logger.info(f"{action} avatar for {artist.handle}") 122 123 if not dry_run: 124 artist.avatar_url = new_url 125 126 if not dry_run: 127 await db.commit() 128 129 logger.info( 130 f"backfill complete: {updated} updated, {cleared} cleared, " 131 f"{unchanged} unchanged, {failed} failed" 132 ) 133 134 135async def main() -> None: 136 """main entry point.""" 137 dry_run = "--dry-run" in sys.argv 138 139 if dry_run: 140 logger.info("running in DRY RUN mode - no changes will be made") 141 142 await backfill_avatars(dry_run=dry_run) 143 144 145if __name__ == "__main__": 146 asyncio.run(main())