feat: move sensitive images to moderation service (#544) (#644)

consolidates sensitive image management in the rust moderation service:

- adds sensitive_images table to moderation db with migration
- adds GET /sensitive-images (public), POST /admin/sensitive-images,
POST /admin/sensitive-images/remove endpoints to moderation service
- adds get_sensitive_images() method to ModerationClient
- updates backend /moderation/sensitive-images to proxy to moderation service
- adds migration script for existing data

closes #544

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>

authored by zzstoatzz.io Claude Opus 4.5 and committed by GitHub a8cf4c83 4b9a79f4

Changed files
+570 -14
backend
src
backend
tests
moderation
scripts
+31
backend/src/backend/_internal/moderation_client.py
··· 35 35 error: str | None = None 36 36 37 37 38 + @dataclass 39 + class SensitiveImagesResult: 40 + """result from fetching sensitive images.""" 41 + 42 + image_ids: list[str] 43 + urls: list[str] 44 + 45 + 38 46 class ModerationClient: 39 47 """client for the plyr.fm moderation service. 40 48 ··· 152 160 except Exception as e: 153 161 logger.warning("failed to emit copyright label for %s: %s", uri, e) 154 162 return EmitLabelResult(success=False, error=str(e)) 163 + 164 + async def get_sensitive_images(self) -> SensitiveImagesResult: 165 + """fetch all sensitive images from the moderation service. 166 + 167 + returns: 168 + SensitiveImagesResult with image_ids and urls 169 + 170 + raises: 171 + httpx.HTTPStatusError: on non-2xx response 172 + httpx.TimeoutException: on timeout 173 + """ 174 + async with httpx.AsyncClient(timeout=self.timeout) as client: 175 + response = await client.get( 176 + f"{self.labeler_url}/sensitive-images", 177 + # no auth required for this public endpoint 178 + ) 179 + response.raise_for_status() 180 + data = response.json() 181 + 182 + return SensitiveImagesResult( 183 + image_ids=data.get("image_ids", []), 184 + urls=data.get("urls", []), 185 + ) 155 186 156 187 async def get_active_labels(self, uris: list[str]) -> set[str]: 157 188 """check which URIs have active (non-negated) copyright-violation labels.
+14 -12
backend/src/backend/api/moderation.py
··· 1 1 """content moderation api endpoints.""" 2 2 3 - from typing import Annotated 3 + import logging 4 4 5 - from fastapi import APIRouter, Depends, Request 5 + from fastapi import APIRouter, Request 6 6 from pydantic import BaseModel 7 - from sqlalchemy import select 8 - from sqlalchemy.ext.asyncio import AsyncSession 9 7 10 - from backend.models import SensitiveImage, get_db 8 + from backend._internal.moderation_client import get_moderation_client 11 9 from backend.utilities.rate_limit import limiter 10 + 11 + logger = logging.getLogger(__name__) 12 12 13 13 router = APIRouter(prefix="/moderation", tags=["moderation"]) 14 14 ··· 26 26 @limiter.limit("10/minute") 27 27 async def get_sensitive_images( 28 28 request: Request, 29 - db: Annotated[AsyncSession, Depends(get_db)], 30 29 ) -> SensitiveImagesResponse: 31 30 """get all flagged sensitive images. 32 31 32 + proxies to the moderation service which is the source of truth 33 + for sensitive image data. 34 + 33 35 returns both image_ids (for R2-stored images) and full URLs 34 36 (for external images like avatars). clients should check both. 35 37 """ 36 - result = await db.execute(select(SensitiveImage)) 37 - images = result.scalars().all() 38 + client = get_moderation_client() 39 + result = await client.get_sensitive_images() 38 40 39 - image_ids = [img.image_id for img in images if img.image_id] 40 - urls = [img.url for img in images if img.url] 41 - 42 - return SensitiveImagesResponse(image_ids=image_ids, urls=urls) 41 + return SensitiveImagesResponse( 42 + image_ids=result.image_ids, 43 + urls=result.urls, 44 + )
+83 -1
backend/tests/test_moderation.py
··· 4 4 5 5 import httpx 6 6 import pytest 7 + from fastapi.testclient import TestClient 7 8 from sqlalchemy import select 8 9 from sqlalchemy.ext.asyncio import AsyncSession 9 10 ··· 11 12 get_active_copyright_labels, 12 13 scan_track_for_copyright, 13 14 ) 14 - from backend._internal.moderation_client import ModerationClient, ScanResult 15 + from backend._internal.moderation_client import ( 16 + ModerationClient, 17 + ScanResult, 18 + SensitiveImagesResult, 19 + ) 15 20 from backend.models import Artist, CopyrightScan, Track 16 21 17 22 ··· 519 524 520 525 # scan2 should still be flagged 521 526 assert scan2.is_flagged is True 527 + 528 + 529 + # tests for sensitive images 530 + 531 + 532 + async def test_moderation_client_get_sensitive_images() -> None: 533 + """test ModerationClient.get_sensitive_images() with successful response.""" 534 + mock_response = Mock() 535 + mock_response.json.return_value = { 536 + "image_ids": ["abc123", "def456"], 537 + "urls": ["https://example.com/image.jpg"], 538 + } 539 + mock_response.raise_for_status.return_value = None 540 + 541 + client = ModerationClient( 542 + service_url="https://test.example.com", 543 + labeler_url="https://labeler.example.com", 544 + auth_token="test-token", 545 + timeout_seconds=30, 546 + label_cache_prefix="test:label:", 547 + label_cache_ttl_seconds=300, 548 + ) 549 + 550 + with patch("httpx.AsyncClient.get", new_callable=AsyncMock) as mock_get: 551 + mock_get.return_value = mock_response 552 + 553 + result = await client.get_sensitive_images() 554 + 555 + assert result.image_ids == ["abc123", "def456"] 556 + assert result.urls == ["https://example.com/image.jpg"] 557 + mock_get.assert_called_once() 558 + 559 + 560 + async def test_moderation_client_get_sensitive_images_empty() -> None: 561 + """test ModerationClient.get_sensitive_images() with empty response.""" 562 + mock_response = Mock() 563 + mock_response.json.return_value = {"image_ids": [], "urls": []} 564 + mock_response.raise_for_status.return_value = None 565 + 566 + client = ModerationClient( 567 + service_url="https://test.example.com", 568 + labeler_url="https://labeler.example.com", 569 + auth_token="test-token", 570 + timeout_seconds=30, 571 + label_cache_prefix="test:label:", 572 + label_cache_ttl_seconds=300, 573 + ) 574 + 575 + with patch("httpx.AsyncClient.get", new_callable=AsyncMock) as mock_get: 576 + mock_get.return_value = mock_response 577 + 578 + result = await client.get_sensitive_images() 579 + 580 + assert result.image_ids == [] 581 + assert result.urls == [] 582 + 583 + 584 + async def test_get_sensitive_images_endpoint( 585 + client: TestClient, 586 + ) -> None: 587 + """test GET /moderation/sensitive-images endpoint proxies to moderation service.""" 588 + mock_result = SensitiveImagesResult( 589 + image_ids=["image1", "image2"], 590 + urls=["https://example.com/avatar.jpg"], 591 + ) 592 + 593 + with patch("backend.api.moderation.get_moderation_client") as mock_get_client: 594 + mock_client = AsyncMock() 595 + mock_client.get_sensitive_images.return_value = mock_result 596 + mock_get_client.return_value = mock_client 597 + 598 + response = client.get("/moderation/sensitive-images") 599 + 600 + assert response.status_code == 200 601 + data = response.json() 602 + assert data["image_ids"] == ["image1", "image2"] 603 + assert data["urls"] == ["https://example.com/avatar.jpg"]
+90
moderation/src/admin.rs
··· 104 104 pub active_uris: Vec<String>, 105 105 } 106 106 107 + /// Request to add a sensitive image. 108 + #[derive(Debug, Deserialize)] 109 + pub struct AddSensitiveImageRequest { 110 + /// R2 storage ID (for track/album artwork) 111 + pub image_id: Option<String>, 112 + /// Full URL (for external images like avatars) 113 + pub url: Option<String>, 114 + /// Why this image was flagged 115 + pub reason: Option<String>, 116 + /// Admin who flagged it 117 + pub flagged_by: Option<String>, 118 + } 119 + 120 + /// Response after adding a sensitive image. 121 + #[derive(Debug, Serialize)] 122 + pub struct AddSensitiveImageResponse { 123 + pub id: i64, 124 + pub message: String, 125 + } 126 + 127 + /// Request to remove a sensitive image. 128 + #[derive(Debug, Deserialize)] 129 + pub struct RemoveSensitiveImageRequest { 130 + pub id: i64, 131 + } 132 + 133 + /// Response after removing a sensitive image. 134 + #[derive(Debug, Serialize)] 135 + pub struct RemoveSensitiveImageResponse { 136 + pub removed: bool, 137 + pub message: String, 138 + } 139 + 107 140 /// List all flagged tracks - returns JSON for API, HTML for htmx. 108 141 pub async fn list_flagged( 109 142 State(state): State<AppState>, ··· 292 325 Ok(Json(StoreContextResponse { 293 326 message: format!("context stored for {}", request.uri), 294 327 })) 328 + } 329 + 330 + /// Add a sensitive image entry. 331 + pub async fn add_sensitive_image( 332 + State(state): State<AppState>, 333 + Json(request): Json<AddSensitiveImageRequest>, 334 + ) -> Result<Json<AddSensitiveImageResponse>, AppError> { 335 + let db = state.db.as_ref().ok_or(AppError::LabelerNotConfigured)?; 336 + 337 + // Validate: at least one of image_id or url must be provided 338 + if request.image_id.is_none() && request.url.is_none() { 339 + return Err(AppError::BadRequest( 340 + "at least one of image_id or url must be provided".to_string(), 341 + )); 342 + } 343 + 344 + tracing::info!( 345 + image_id = ?request.image_id, 346 + url = ?request.url, 347 + reason = ?request.reason, 348 + flagged_by = ?request.flagged_by, 349 + "adding sensitive image" 350 + ); 351 + 352 + let id = db 353 + .add_sensitive_image( 354 + request.image_id.as_deref(), 355 + request.url.as_deref(), 356 + request.reason.as_deref(), 357 + request.flagged_by.as_deref(), 358 + ) 359 + .await?; 360 + 361 + Ok(Json(AddSensitiveImageResponse { 362 + id, 363 + message: "sensitive image added".to_string(), 364 + })) 365 + } 366 + 367 + /// Remove a sensitive image entry. 368 + pub async fn remove_sensitive_image( 369 + State(state): State<AppState>, 370 + Json(request): Json<RemoveSensitiveImageRequest>, 371 + ) -> Result<Json<RemoveSensitiveImageResponse>, AppError> { 372 + let db = state.db.as_ref().ok_or(AppError::LabelerNotConfigured)?; 373 + 374 + tracing::info!(id = request.id, "removing sensitive image"); 375 + 376 + let removed = db.remove_sensitive_image(request.id).await?; 377 + 378 + let message = if removed { 379 + format!("sensitive image {} removed", request.id) 380 + } else { 381 + format!("sensitive image {} not found", request.id) 382 + }; 383 + 384 + Ok(Json(RemoveSensitiveImageResponse { removed, message })) 295 385 } 296 386 297 387 /// Serve the admin UI HTML from static file.
+1
moderation/src/auth.rs
··· 16 16 // Static files must be public for admin UI CSS/JS to load 17 17 if path == "/" 18 18 || path == "/health" 19 + || path == "/sensitive-images" 19 20 || path == "/admin" 20 21 || path.starts_with("/static/") 21 22 || path.starts_with("/xrpc/com.atproto.label.")
+85 -1
moderation/src/db.rs
··· 2 2 3 3 use chrono::{DateTime, Utc}; 4 4 use serde::{Deserialize, Serialize}; 5 - use sqlx::{postgres::PgPoolOptions, PgPool}; 5 + use sqlx::{postgres::PgPoolOptions, FromRow, PgPool}; 6 6 7 7 use crate::admin::FlaggedTrack; 8 8 use crate::labels::Label; 9 + 10 + /// Sensitive image record from the database. 11 + #[derive(Debug, Clone, FromRow, Serialize, Deserialize)] 12 + pub struct SensitiveImageRow { 13 + pub id: i64, 14 + /// R2 storage ID (for track/album artwork) 15 + pub image_id: Option<String>, 16 + /// Full URL (for external images like avatars) 17 + pub url: Option<String>, 18 + /// Why this image was flagged 19 + pub reason: Option<String>, 20 + /// When the image was flagged 21 + pub flagged_at: DateTime<Utc>, 22 + /// Admin who flagged it 23 + pub flagged_by: Option<String>, 24 + } 9 25 10 26 /// Type alias for context row from database query. 11 27 type ContextRow = ( ··· 193 209 .execute(&self.pool) 194 210 .await?; 195 211 sqlx::query("ALTER TABLE label_context ADD COLUMN IF NOT EXISTS resolution_notes TEXT") 212 + .execute(&self.pool) 213 + .await?; 214 + 215 + // Sensitive images table for content moderation 216 + sqlx::query( 217 + r#" 218 + CREATE TABLE IF NOT EXISTS sensitive_images ( 219 + id BIGSERIAL PRIMARY KEY, 220 + image_id TEXT, 221 + url TEXT, 222 + reason TEXT, 223 + flagged_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), 224 + flagged_by TEXT 225 + ) 226 + "#, 227 + ) 228 + .execute(&self.pool) 229 + .await?; 230 + 231 + sqlx::query("CREATE INDEX IF NOT EXISTS idx_sensitive_images_image_id ON sensitive_images(image_id)") 232 + .execute(&self.pool) 233 + .await?; 234 + sqlx::query("CREATE INDEX IF NOT EXISTS idx_sensitive_images_url ON sensitive_images(url)") 196 235 .execute(&self.pool) 197 236 .await?; 198 237 ··· 592 631 .collect(); 593 632 594 633 Ok(tracks) 634 + } 635 + 636 + // ------------------------------------------------------------------------- 637 + // Sensitive images 638 + // ------------------------------------------------------------------------- 639 + 640 + /// Get all sensitive images. 641 + pub async fn get_sensitive_images(&self) -> Result<Vec<SensitiveImageRow>, sqlx::Error> { 642 + sqlx::query_as::<_, SensitiveImageRow>( 643 + "SELECT id, image_id, url, reason, flagged_at, flagged_by FROM sensitive_images ORDER BY flagged_at DESC", 644 + ) 645 + .fetch_all(&self.pool) 646 + .await 647 + } 648 + 649 + /// Add a sensitive image entry. 650 + pub async fn add_sensitive_image( 651 + &self, 652 + image_id: Option<&str>, 653 + url: Option<&str>, 654 + reason: Option<&str>, 655 + flagged_by: Option<&str>, 656 + ) -> Result<i64, sqlx::Error> { 657 + sqlx::query_scalar::<_, i64>( 658 + r#" 659 + INSERT INTO sensitive_images (image_id, url, reason, flagged_by) 660 + VALUES ($1, $2, $3, $4) 661 + RETURNING id 662 + "#, 663 + ) 664 + .bind(image_id) 665 + .bind(url) 666 + .bind(reason) 667 + .bind(flagged_by) 668 + .fetch_one(&self.pool) 669 + .await 670 + } 671 + 672 + /// Remove a sensitive image entry by ID. 673 + pub async fn remove_sensitive_image(&self, id: i64) -> Result<bool, sqlx::Error> { 674 + let result = sqlx::query("DELETE FROM sensitive_images WHERE id = $1") 675 + .bind(id) 676 + .execute(&self.pool) 677 + .await?; 678 + Ok(result.rows_affected() > 0) 595 679 } 596 680 } 597 681
+26
moderation/src/handlers.rs
··· 63 63 pub label: Label, 64 64 } 65 65 66 + /// Response for sensitive images endpoint. 67 + #[derive(Debug, Serialize)] 68 + pub struct SensitiveImagesResponse { 69 + /// R2 image IDs (for track/album artwork) 70 + pub image_ids: Vec<String>, 71 + /// Full URLs (for external images like avatars) 72 + pub urls: Vec<String>, 73 + } 74 + 66 75 // --- handlers --- 67 76 68 77 /// Health check endpoint. ··· 206 215 } 207 216 208 217 Ok(Json(EmitLabelResponse { seq, label })) 218 + } 219 + 220 + /// Get all sensitive images (public endpoint). 221 + /// 222 + /// Returns image_ids (R2 storage IDs) and urls (full URLs) for all flagged images. 223 + /// Clients should check both lists when determining if an image is sensitive. 224 + pub async fn get_sensitive_images( 225 + State(state): State<AppState>, 226 + ) -> Result<Json<SensitiveImagesResponse>, AppError> { 227 + let db = state.db.as_ref().ok_or(AppError::LabelerNotConfigured)?; 228 + 229 + let images = db.get_sensitive_images().await?; 230 + 231 + let image_ids: Vec<String> = images.iter().filter_map(|i| i.image_id.clone()).collect(); 232 + let urls: Vec<String> = images.iter().filter_map(|i| i.url.clone()).collect(); 233 + 234 + Ok(Json(SensitiveImagesResponse { image_ids, urls })) 209 235 } 210 236 211 237 #[cfg(test)]
+7
moderation/src/main.rs
··· 72 72 .route("/", get(handlers::landing)) 73 73 // Health check 74 74 .route("/health", get(handlers::health)) 75 + // Sensitive images (public) 76 + .route("/sensitive-images", get(handlers::get_sensitive_images)) 75 77 // AuDD scanning 76 78 .route("/scan", post(audd::scan)) 77 79 // Label emission (internal API) ··· 84 86 .route("/admin/resolve-htmx", post(admin::resolve_flag_htmx)) 85 87 .route("/admin/context", post(admin::store_context)) 86 88 .route("/admin/active-labels", post(admin::get_active_labels)) 89 + .route("/admin/sensitive-images", post(admin::add_sensitive_image)) 90 + .route( 91 + "/admin/sensitive-images/remove", 92 + post(admin::remove_sensitive_image), 93 + ) 87 94 // Static files (CSS, JS for admin UI) 88 95 .nest_service("/static", ServeDir::new("static")) 89 96 // ATProto XRPC endpoints (public)
+4
moderation/src/state.rs
··· 32 32 #[error("labeler not configured")] 33 33 LabelerNotConfigured, 34 34 35 + #[error("bad request: {0}")] 36 + BadRequest(String), 37 + 35 38 #[error("label error: {0}")] 36 39 Label(#[from] LabelError), 37 40 ··· 50 53 AppError::LabelerNotConfigured => { 51 54 (StatusCode::SERVICE_UNAVAILABLE, "LabelerNotConfigured") 52 55 } 56 + AppError::BadRequest(_) => (StatusCode::BAD_REQUEST, "BadRequest"), 53 57 AppError::Label(_) => (StatusCode::INTERNAL_SERVER_ERROR, "LabelError"), 54 58 AppError::Database(_) => (StatusCode::INTERNAL_SERVER_ERROR, "DatabaseError"), 55 59 AppError::Io(_) => (StatusCode::INTERNAL_SERVER_ERROR, "IoError"),
+229
scripts/migrate_sensitive_images.py
··· 1 + #!/usr/bin/env -S uv run --script --quiet --with-editable=backend 2 + # /// script 3 + # requires-python = ">=3.12" 4 + # dependencies = [ 5 + # "httpx", 6 + # "pydantic-settings", 7 + # "asyncpg", 8 + # "sqlalchemy[asyncio]", 9 + # ] 10 + # /// 11 + """migrate sensitive images from backend database to moderation service. 12 + 13 + this script reads sensitive images from the backend database and creates 14 + them in the moderation service. after migration, the backend will proxy 15 + sensitive image requests to the moderation service. 16 + 17 + usage: 18 + uv run scripts/migrate_sensitive_images.py --env prod --dry-run 19 + uv run scripts/migrate_sensitive_images.py --env prod 20 + 21 + environment variables (set in .env or export): 22 + PROD_DATABASE_URL - production database connection string 23 + STAGING_DATABASE_URL - staging database connection string 24 + DEV_DATABASE_URL - development database connection string 25 + MODERATION_SERVICE_URL - URL of moderation service 26 + MODERATION_AUTH_TOKEN - auth token for moderation service 27 + """ 28 + 29 + import argparse 30 + import asyncio 31 + import os 32 + from typing import Literal 33 + 34 + import httpx 35 + from pydantic import Field 36 + from pydantic_settings import BaseSettings, SettingsConfigDict 37 + from sqlalchemy import text 38 + from sqlalchemy.ext.asyncio import create_async_engine 39 + 40 + Environment = Literal["dev", "staging", "prod"] 41 + 42 + 43 + class MigrationSettings(BaseSettings): 44 + """settings for migration script.""" 45 + 46 + model_config = SettingsConfigDict( 47 + env_file=".env", 48 + case_sensitive=False, 49 + extra="ignore", 50 + ) 51 + 52 + dev_database_url: str = Field(default="", validation_alias="DEV_DATABASE_URL") 53 + staging_database_url: str = Field( 54 + default="", validation_alias="STAGING_DATABASE_URL" 55 + ) 56 + prod_database_url: str = Field(default="", validation_alias="PROD_DATABASE_URL") 57 + 58 + moderation_service_url: str = Field( 59 + default="https://moderation.plyr.fm", 60 + validation_alias="MODERATION_SERVICE_URL", 61 + ) 62 + moderation_auth_token: str = Field( 63 + default="", validation_alias="MODERATION_AUTH_TOKEN" 64 + ) 65 + 66 + def get_database_url(self, env: Environment) -> str: 67 + """get database URL for environment.""" 68 + urls = { 69 + "dev": self.dev_database_url, 70 + "staging": self.staging_database_url, 71 + "prod": self.prod_database_url, 72 + } 73 + url = urls.get(env, "") 74 + if not url: 75 + raise ValueError(f"no database URL configured for {env}") 76 + # ensure asyncpg driver is used 77 + if url.startswith("postgresql://"): 78 + url = url.replace("postgresql://", "postgresql+asyncpg://", 1) 79 + return url 80 + 81 + def get_moderation_url(self, env: Environment) -> str: 82 + """get moderation service URL for environment.""" 83 + if env == "dev": 84 + return os.getenv("DEV_MODERATION_URL", "http://localhost:8002") 85 + elif env == "staging": 86 + return os.getenv("STAGING_MODERATION_URL", "https://moderation-stg.plyr.fm") 87 + else: 88 + return self.moderation_service_url 89 + 90 + 91 + async def fetch_sensitive_images(db_url: str) -> list[dict]: 92 + """fetch all sensitive images from backend database.""" 93 + engine = create_async_engine(db_url) 94 + 95 + async with engine.begin() as conn: 96 + result = await conn.execute( 97 + text( 98 + """ 99 + SELECT id, image_id, url, reason, flagged_at, flagged_by 100 + FROM sensitive_images 101 + ORDER BY id 102 + """ 103 + ) 104 + ) 105 + rows = result.fetchall() 106 + 107 + await engine.dispose() 108 + 109 + return [ 110 + { 111 + "id": row[0], 112 + "image_id": row[1], 113 + "url": row[2], 114 + "reason": row[3], 115 + "flagged_at": row[4].isoformat() if row[4] else None, 116 + "flagged_by": row[5], 117 + } 118 + for row in rows 119 + ] 120 + 121 + 122 + async def migrate_to_moderation_service( 123 + images: list[dict], 124 + moderation_url: str, 125 + auth_token: str, 126 + dry_run: bool = False, 127 + ) -> tuple[int, int]: 128 + """migrate images to moderation service. 129 + 130 + returns: 131 + tuple of (success_count, error_count) 132 + """ 133 + success_count = 0 134 + error_count = 0 135 + 136 + headers = {"X-Moderation-Key": auth_token} 137 + 138 + async with httpx.AsyncClient(timeout=30.0) as client: 139 + for image in images: 140 + payload = { 141 + "image_id": image["image_id"], 142 + "url": image["url"], 143 + "reason": image["reason"], 144 + "flagged_by": image["flagged_by"], 145 + } 146 + 147 + if dry_run: 148 + print(f" [dry-run] would migrate: {payload}") 149 + success_count += 1 150 + continue 151 + 152 + try: 153 + response = await client.post( 154 + f"{moderation_url}/admin/sensitive-images", 155 + json=payload, 156 + headers=headers, 157 + ) 158 + response.raise_for_status() 159 + result = response.json() 160 + print(f" migrated id={image['id']} -> moderation id={result['id']}") 161 + success_count += 1 162 + except Exception as e: 163 + print(f" ERROR migrating id={image['id']}: {e}") 164 + error_count += 1 165 + 166 + return success_count, error_count 167 + 168 + 169 + async def main() -> None: 170 + parser = argparse.ArgumentParser( 171 + description="migrate sensitive images to moderation service" 172 + ) 173 + parser.add_argument( 174 + "--env", 175 + choices=["dev", "staging", "prod"], 176 + required=True, 177 + help="environment to migrate", 178 + ) 179 + parser.add_argument( 180 + "--dry-run", 181 + action="store_true", 182 + help="show what would be migrated without making changes", 183 + ) 184 + args = parser.parse_args() 185 + 186 + settings = MigrationSettings() 187 + 188 + print(f"migrating sensitive images for {args.env}") 189 + if args.dry_run: 190 + print("(dry run - no changes will be made)") 191 + 192 + # fetch from backend database 193 + db_url = settings.get_database_url(args.env) 194 + print("\nfetching from backend database...") 195 + images = await fetch_sensitive_images(db_url) 196 + print(f"found {len(images)} sensitive images") 197 + 198 + if not images: 199 + print("nothing to migrate") 200 + return 201 + 202 + # migrate to moderation service 203 + moderation_url = settings.get_moderation_url(args.env) 204 + print(f"\nmigrating to moderation service at {moderation_url}...") 205 + 206 + if not settings.moderation_auth_token and not args.dry_run: 207 + print("ERROR: MODERATION_AUTH_TOKEN not set") 208 + return 209 + 210 + success, errors = await migrate_to_moderation_service( 211 + images, 212 + moderation_url, 213 + settings.moderation_auth_token, 214 + dry_run=args.dry_run, 215 + ) 216 + 217 + print(f"\ndone: {success} migrated, {errors} errors") 218 + 219 + if not args.dry_run and errors == 0: 220 + print( 221 + "\nnext steps:\n" 222 + " 1. verify data in moderation service: GET /sensitive-images\n" 223 + " 2. update backend to proxy to moderation service\n" 224 + " 3. optionally drop sensitive_images table from backend db" 225 + ) 226 + 227 + 228 + if __name__ == "__main__": 229 + asyncio.run(main())