#!/usr/bin/env -S uv run --script --quiet
"""backfill image_url for tracks with image_id but missing image_url.

## Context

PR #184 added image_url column to tracks table to eliminate N+1 R2 API calls.
New uploads automatically populate image_url at creation time, but 15 legacy
tracks uploaded before the PR still have image_url = NULL.

This causes slow /tracks/liked endpoint performance because we fall back to
calling track.get_image_url() which hits R2 API on every request.

## What This Script Does

1. Finds all tracks with image_id but NULL image_url
2. Computes image_url by calling storage.get_url(image_id)
3. Updates database with computed URLs
4. Runs concurrently for performance

## Usage

```bash
# dry run (show what would be updated)
uv run scripts/backfill_image_urls.py --dry-run

# actually update the database
uv run scripts/backfill_image_urls.py

# target specific environment
DATABASE_URL=postgresql://... uv run scripts/backfill_image_urls.py
```
"""

import asyncio
import logging
import sys
from pathlib import Path

# add src to path so we can import backend modules
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))

from sqlalchemy import select

from backend.config import settings
from backend.models import Track
from backend.storage import storage
from backend.storage.r2 import R2Storage
from backend.utilities.database import db_session

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)


async def backfill_image_urls(dry_run: bool = False) -> None:
    """backfill image_url for tracks with image_id but missing image_url."""

    if not isinstance(storage, R2Storage):
        logger.error("storage backend is not R2, cannot compute image URLs")
        return

    logger.info(f"storage backend: {settings.storage.backend}")

    async with db_session() as db:
        # find tracks with image_id but no image_url
        stmt = select(Track).where(
            Track.image_id.isnot(None), Track.image_url.is_(None)
        )
        result = await db.execute(stmt)
        tracks = result.scalars().all()

        if not tracks:
            logger.info("no tracks need backfilling")
            return

        logger.info(f"found {len(tracks)} tracks needing image_url backfill")

        if dry_run:
            logger.info("dry run mode - showing tracks that would be updated:")
            for track in tracks:
                logger.info(
                    f"  track {track.id}: {track.title} (image_id: {track.image_id})"
                )
            return

        # compute image URLs concurrently
        logger.info("computing image URLs from R2...")

        async def compute_and_update(
            track: Track,
        ) -> tuple[int, str | None, Exception | None]:
            """compute image_url for a track and return (track_id, url, error)."""
            try:
                url = await storage.get_url(track.image_id, file_type="image")
                return (track.id, url, None)
            except Exception as e:
                return (track.id, None, e)

        results = await asyncio.gather(*[compute_and_update(t) for t in tracks])

        # update database with computed URLs
        updated = 0
        failed = 0

        for track_id, image_url, error in results:
            if image_url:
                # find the track object
                track = next(t for t in tracks if t.id == track_id)
                track.image_url = image_url
                updated += 1
                logger.info(f"updated track {track_id}: {track.title}")
            else:
                failed += 1
                track = next(t for t in tracks if t.id == track_id)
                logger.error(
                    f"failed to compute URL for track {track_id} ({track.title}): {error}"
                )

        await db.commit()

        logger.info(f"backfill complete: {updated} updated, {failed} failed")


async def main() -> None:
    """main entry point."""
    dry_run = "--dry-run" in sys.argv

    if dry_run:
        logger.info("running in DRY RUN mode - no changes will be made")

    await backfill_image_urls(dry_run=dry_run)


if __name__ == "__main__":
    asyncio.run(main())