feat: add automated image moderation via Claude vision (#687)

adds automated scanning of uploaded images (track covers, album covers)
for policy violations using Claude Sonnet vision capabilities.

## moderation service (rust)
- new `/scan-image` endpoint accepts multipart form with image + image_id
- `claude.rs`: Claude API client with vision support
- `image_scans` table for cost tracking and audit trail
- auto-flags unsafe images to `sensitive_images` table

## backend (python)
- `ModerationClient.scan_image()` method for calling the new endpoint
- integration in `upload_track_image()` and album cover upload
- `image_moderation_enabled` setting (default: true)

moderation is best-effort - failures are logged but don't block uploads.
flagged images are blurred in the UI (existing sensitive_images behavior).

closes #166

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>

authored by zzstoatzz.io Claude Opus 4.5 and committed by GitHub ce8dc35e 04051e6d

Changed files
+577 -3
backend
src
moderation
+61
backend/src/backend/_internal/moderation_client.py
··· 43 43 urls: list[str] 44 44 45 45 46 + @dataclass 47 + class ScanImageResult: 48 + """result from scanning an image for policy violations.""" 49 + 50 + is_safe: bool 51 + reason: str | None 52 + severity: str 53 + violated_categories: list[str] 54 + 55 + 46 56 class ModerationClient: 47 57 """client for the plyr.fm moderation service. 48 58 ··· 183 193 image_ids=data.get("image_ids", []), 184 194 urls=data.get("urls", []), 185 195 ) 196 + 197 + async def scan_image( 198 + self, image_bytes: bytes, image_id: str, content_type: str = "image/png" 199 + ) -> ScanImageResult: 200 + """scan an image for policy violations using claude vision. 201 + 202 + the moderation service will: 203 + - analyze the image using claude 204 + - store the scan result for cost tracking 205 + - automatically flag the image if it violates policy 206 + 207 + args: 208 + image_bytes: raw image bytes 209 + image_id: identifier for tracking (e.g., r2 file id) 210 + content_type: mime type of the image 211 + 212 + returns: 213 + ScanImageResult with moderation decision 214 + 215 + raises: 216 + httpx.HTTPStatusError: on non-2xx response 217 + httpx.TimeoutException: on timeout 218 + """ 219 + # use a longer timeout for image moderation (claude API call) 220 + timeout = httpx.Timeout(30.0) 221 + 222 + async with httpx.AsyncClient(timeout=timeout) as client: 223 + response = await client.post( 224 + f"{self.labeler_url}/scan-image", 225 + files={"image": (image_id, image_bytes, content_type)}, 226 + data={"image_id": image_id}, 227 + headers=self._headers(), 228 + ) 229 + response.raise_for_status() 230 + data = response.json() 231 + 232 + result = ScanImageResult( 233 + is_safe=data.get("is_safe", True), 234 + reason=data.get("reason"), 235 + severity=data.get("severity", "safe"), 236 + violated_categories=data.get("violated_categories", []), 237 + ) 238 + 239 + logfire.info( 240 + "image scan completed", 241 + image_id=image_id, 242 + is_safe=result.is_safe, 243 + severity=result.severity, 244 + ) 245 + 246 + return result 186 247 187 248 async def get_active_labels(self, uris: list[str]) -> set[str]: 188 249 """check which URIs have active (non-negated) copyright-violation labels.
+19
backend/src/backend/api/albums.py
··· 25 25 from backend._internal import Session as AuthSession 26 26 from backend._internal import require_artist_profile 27 27 from backend._internal.auth import get_session 28 + from backend._internal.moderation_client import get_moderation_client 29 + from backend.config import settings 28 30 from backend.models import Album, Artist, Track, TrackLike, get_db 29 31 from backend.schemas import TrackResponse 30 32 from backend.storage import storage ··· 440 442 441 443 # construct R2 URL directly (images are stored under images/ prefix) 442 444 image_url = f"{storage.public_image_bucket_url}/images/{image_id}{ext}" 445 + 446 + # scan image for policy violations (non-blocking) 447 + if settings.moderation.image_moderation_enabled: 448 + try: 449 + client = get_moderation_client() 450 + content_type = { 451 + ".jpg": "image/jpeg", 452 + ".jpeg": "image/jpeg", 453 + ".png": "image/png", 454 + ".webp": "image/webp", 455 + }.get(ext, "image/png") 456 + await client.scan_image(bytes(image_data), image_id, content_type) 457 + # if image is flagged, it's automatically added to sensitive_images 458 + # by the moderation service. the image is still saved and returned. 459 + except Exception as e: 460 + # log but don't block upload - moderation is best-effort 461 + logger.warning("image moderation failed for %s: %s", image_id, e) 443 462 444 463 # delete old image if exists (prevent R2 object leaks) 445 464 if album.image_id:
+19 -1
backend/src/backend/api/tracks/metadata_service.py
··· 2 2 3 3 import asyncio 4 4 import json 5 + import logging 5 6 from io import BytesIO 6 7 from typing import TYPE_CHECKING, Any 7 8 ··· 11 12 12 13 from backend._internal.atproto.handles import resolve_handle 13 14 from backend._internal.image import ImageFormat 15 + from backend._internal.moderation_client import get_moderation_client 16 + from backend.config import settings 14 17 from backend.models import Track 15 18 from backend.storage import storage 16 19 17 20 from .constants import MAX_FEATURES 18 21 from .services import get_or_create_album 22 + 23 + logger = logging.getLogger(__name__) 19 24 20 25 MAX_IMAGE_SIZE_BYTES = 20 * 1024 * 1024 # 20MB 21 26 ··· 111 116 if not image.filename: 112 117 raise HTTPException(status_code=400, detail="image filename missing") 113 118 114 - _image_format, is_valid = ImageFormat.validate_and_extract(image.filename) 119 + image_format, is_valid = ImageFormat.validate_and_extract(image.filename) 115 120 if not is_valid: 116 121 raise HTTPException( 117 122 status_code=400, ··· 128 133 image_obj = BytesIO(image_data) 129 134 image_id = await storage.save(image_obj, f"images/{image.filename}") 130 135 image_url = await storage.get_url(image_id, file_type="image") 136 + 137 + # scan image for policy violations (non-blocking) 138 + if settings.moderation.image_moderation_enabled: 139 + try: 140 + client = get_moderation_client() 141 + content_type = image_format.mime_type if image_format else "image/png" 142 + await client.scan_image(image_data, image_id, content_type) 143 + # note: if image is flagged, it's automatically added to sensitive_images 144 + # by the moderation service. the image is still saved and returned - 145 + # sensitive images are just blurred in the UI, not rejected. 146 + except Exception as e: 147 + # log but don't block upload - moderation is best-effort 148 + logger.warning("image moderation failed for %s: %s", image_id, e) 131 149 132 150 return image_id, image_url
+4
backend/src/backend/config.py
··· 566 566 default=300, 567 567 description="TTL in seconds for cached copyright label status (default 5 min)", 568 568 ) 569 + image_moderation_enabled: bool = Field( 570 + default=True, 571 + description="Enable image moderation via Claude vision on upload", 572 + ) 569 573 570 574 571 575 class DocketSettings(AppSettingsSection):
+28
moderation/Cargo.lock
··· 85 85 "matchit", 86 86 "memchr", 87 87 "mime", 88 + "multer", 88 89 "percent-encoding", 89 90 "pin-project-lite", 90 91 "rustversion", ··· 466 467 "sec1", 467 468 "subtle", 468 469 "zeroize", 470 + ] 471 + 472 + [[package]] 473 + name = "encoding_rs" 474 + version = "0.8.35" 475 + source = "registry+https://github.com/rust-lang/crates.io-index" 476 + checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" 477 + dependencies = [ 478 + "cfg-if", 469 479 ] 470 480 471 481 [[package]] ··· 1203 1213 dependencies = [ 1204 1214 "anyhow", 1205 1215 "axum", 1216 + "base64", 1206 1217 "bytes", 1207 1218 "chrono", 1208 1219 "futures", ··· 1221 1232 "tower-http", 1222 1233 "tracing", 1223 1234 "tracing-subscriber", 1235 + ] 1236 + 1237 + [[package]] 1238 + name = "multer" 1239 + version = "3.1.0" 1240 + source = "registry+https://github.com/rust-lang/crates.io-index" 1241 + checksum = "83e87776546dc87511aa5ee218730c92b666d7264ab6ed41f9d215af9cd5224b" 1242 + dependencies = [ 1243 + "bytes", 1244 + "encoding_rs", 1245 + "futures-util", 1246 + "http", 1247 + "httparse", 1248 + "memchr", 1249 + "mime", 1250 + "spin", 1251 + "version_check", 1224 1252 ] 1225 1253 1226 1254 [[package]]
+2 -1
moderation/Cargo.toml
··· 5 5 6 6 [dependencies] 7 7 anyhow = "1.0" 8 - axum = { version = "0.7", features = ["macros", "json", "ws"] } 8 + axum = { version = "0.7", features = ["macros", "json", "ws", "multipart"] } 9 + base64 = "0.22" 9 10 rand = "0.8" 10 11 bytes = "1.0" 11 12 chrono = { version = "0.4", features = ["serde"] }
+193
moderation/src/claude.rs
··· 1 + //! Claude API client for image moderation using structured outputs. 2 + 3 + use base64::{engine::general_purpose::STANDARD, Engine}; 4 + use serde::{Deserialize, Serialize}; 5 + use tracing::info; 6 + 7 + const CLAUDE_API_URL: &str = "https://api.anthropic.com/v1/messages"; 8 + const ANTHROPIC_VERSION: &str = "2023-06-01"; 9 + const STRUCTURED_OUTPUTS_BETA: &str = "structured-outputs-2025-11-13"; 10 + 11 + /// Result of image moderation analysis. 12 + #[derive(Debug, Clone, Serialize, Deserialize)] 13 + pub struct ModerationResult { 14 + pub is_safe: bool, 15 + pub violated_categories: Vec<String>, 16 + pub severity: String, 17 + pub explanation: String, 18 + } 19 + 20 + /// Claude API client for image moderation. 21 + pub struct ClaudeClient { 22 + api_key: String, 23 + model: String, 24 + http: reqwest::Client, 25 + } 26 + 27 + impl ClaudeClient { 28 + pub fn new(api_key: String, model: Option<String>) -> Self { 29 + Self { 30 + api_key, 31 + model: model.unwrap_or_else(|| "claude-sonnet-4-5-20250514".to_string()), 32 + http: reqwest::Client::new(), 33 + } 34 + } 35 + 36 + /// Analyze an image for policy violations using structured outputs. 37 + pub async fn analyze_image( 38 + &self, 39 + image_bytes: &[u8], 40 + media_type: &str, 41 + ) -> anyhow::Result<ModerationResult> { 42 + let b64 = STANDARD.encode(image_bytes); 43 + 44 + // Build request with structured output schema 45 + let request = serde_json::json!({ 46 + "model": self.model, 47 + "max_tokens": 1024, 48 + "messages": [{ 49 + "role": "user", 50 + "content": [ 51 + { 52 + "type": "text", 53 + "text": MODERATION_PROMPT 54 + }, 55 + { 56 + "type": "image", 57 + "source": { 58 + "type": "base64", 59 + "media_type": media_type, 60 + "data": b64 61 + } 62 + } 63 + ] 64 + }], 65 + // Structured output schema - guarantees valid JSON matching this schema 66 + "output_format": { 67 + "type": "json_schema", 68 + "schema": { 69 + "type": "object", 70 + "properties": { 71 + "is_safe": { 72 + "type": "boolean", 73 + "description": "Whether the image passes moderation" 74 + }, 75 + "violated_categories": { 76 + "type": "array", 77 + "items": { "type": "string" }, 78 + "description": "List of policy categories violated, empty if safe" 79 + }, 80 + "severity": { 81 + "type": "string", 82 + "enum": ["safe", "low", "medium", "high"], 83 + "description": "Severity level of the violation" 84 + }, 85 + "explanation": { 86 + "type": "string", 87 + "description": "Brief explanation of the moderation decision" 88 + } 89 + }, 90 + "required": ["is_safe", "violated_categories", "severity", "explanation"], 91 + "additionalProperties": false 92 + } 93 + } 94 + }); 95 + 96 + info!(model = %self.model, "analyzing image with structured outputs"); 97 + 98 + let response = self 99 + .http 100 + .post(CLAUDE_API_URL) 101 + .header("x-api-key", &self.api_key) 102 + .header("anthropic-version", ANTHROPIC_VERSION) 103 + .header("anthropic-beta", STRUCTURED_OUTPUTS_BETA) 104 + .header("content-type", "application/json") 105 + .json(&request) 106 + .send() 107 + .await?; 108 + 109 + if !response.status().is_success() { 110 + let status = response.status(); 111 + let body = response.text().await.unwrap_or_default(); 112 + anyhow::bail!("claude API error {status}: {body}"); 113 + } 114 + 115 + let response: ClaudeResponse = response.json().await?; 116 + 117 + // Check for refusal 118 + if response.stop_reason == Some("refusal".to_string()) { 119 + anyhow::bail!("claude refused to analyze the image"); 120 + } 121 + 122 + // Check for max_tokens cutoff 123 + if response.stop_reason == Some("max_tokens".to_string()) { 124 + anyhow::bail!("response was cut off due to max_tokens limit"); 125 + } 126 + 127 + // Extract text content - guaranteed to be valid JSON matching our schema 128 + let text = response 129 + .content 130 + .into_iter() 131 + .find_map(|block| { 132 + if block.content_type == "text" { 133 + block.text 134 + } else { 135 + None 136 + } 137 + }) 138 + .ok_or_else(|| anyhow::anyhow!("no text content in response"))?; 139 + 140 + // Direct JSON parse - no string manipulation needed thanks to structured outputs 141 + serde_json::from_str(&text) 142 + .map_err(|e| anyhow::anyhow!("failed to parse structured output: {e}")) 143 + } 144 + } 145 + 146 + #[derive(Debug, Deserialize)] 147 + struct ClaudeResponse { 148 + content: Vec<ContentBlock>, 149 + stop_reason: Option<String>, 150 + } 151 + 152 + #[derive(Debug, Deserialize)] 153 + struct ContentBlock { 154 + #[serde(rename = "type")] 155 + content_type: String, 156 + text: Option<String>, 157 + } 158 + 159 + const MODERATION_PROMPT: &str = r#"You are a content moderator for a music streaming platform. Analyze the provided image (album/track cover art) for policy violations. 160 + 161 + Check for: 162 + 1. Explicit sexual content (nudity, pornography) 163 + 2. Extreme violence or gore 164 + 3. Hate symbols or content 165 + 4. Illegal content 166 + 5. Graphic drug use imagery 167 + 168 + Note: Artistic nudity in album art (like classic rock covers) may be acceptable if not explicit/pornographic. 169 + 170 + Analyze the image and provide your moderation decision."#; 171 + 172 + #[cfg(test)] 173 + mod tests { 174 + use super::*; 175 + 176 + #[test] 177 + fn test_parse_safe_response() { 178 + let response = r#"{"is_safe": true, "violated_categories": [], "severity": "safe", "explanation": "Normal album artwork"}"#; 179 + let result: ModerationResult = serde_json::from_str(response).unwrap(); 180 + assert!(result.is_safe); 181 + assert!(result.violated_categories.is_empty()); 182 + assert_eq!(result.severity, "safe"); 183 + } 184 + 185 + #[test] 186 + fn test_parse_unsafe_response() { 187 + let response = r#"{"is_safe": false, "violated_categories": ["explicit_sexual"], "severity": "high", "explanation": "Contains explicit nudity"}"#; 188 + let result: ModerationResult = serde_json::from_str(response).unwrap(); 189 + assert!(!result.is_safe); 190 + assert_eq!(result.violated_categories, vec!["explicit_sexual"]); 191 + assert_eq!(result.severity, "high"); 192 + } 193 + }
+12
moderation/src/config.rs
··· 13 13 pub database_url: Option<String>, 14 14 pub labeler_did: Option<String>, 15 15 pub labeler_signing_key: Option<String>, 16 + /// Anthropic API key for Claude image moderation 17 + pub claude_api_key: Option<String>, 18 + /// Claude model to use (default: claude-sonnet-4-5-20250514) 19 + pub claude_model: String, 16 20 } 17 21 18 22 impl Config { ··· 32 36 database_url: env::var("MODERATION_DATABASE_URL").ok(), 33 37 labeler_did: env::var("MODERATION_LABELER_DID").ok(), 34 38 labeler_signing_key: env::var("MODERATION_LABELER_SIGNING_KEY").ok(), 39 + claude_api_key: env::var("ANTHROPIC_API_KEY").ok(), 40 + claude_model: env::var("MODERATION_CLAUDE_MODEL") 41 + .unwrap_or_else(|_| "claude-sonnet-4-5-20250514".to_string()), 35 42 }) 43 + } 44 + 45 + /// Check if Claude image moderation is enabled. 46 + pub fn claude_enabled(&self) -> bool { 47 + self.claude_api_key.is_some() && self.database_url.is_some() 36 48 } 37 49 38 50 /// Check if labeler is fully configured.
+86
moderation/src/db.rs
··· 263 263 .execute(&self.pool) 264 264 .await?; 265 265 266 + // Image scans table for tracking automated moderation 267 + sqlx::query( 268 + r#" 269 + CREATE TABLE IF NOT EXISTS image_scans ( 270 + id BIGSERIAL PRIMARY KEY, 271 + image_id TEXT NOT NULL, 272 + is_safe BOOLEAN NOT NULL, 273 + violated_categories JSONB, 274 + severity TEXT, 275 + explanation TEXT, 276 + scanned_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), 277 + model TEXT 278 + ) 279 + "#, 280 + ) 281 + .execute(&self.pool) 282 + .await?; 283 + 284 + sqlx::query("CREATE INDEX IF NOT EXISTS idx_image_scans_image_id ON image_scans(image_id)") 285 + .execute(&self.pool) 286 + .await?; 287 + sqlx::query("CREATE INDEX IF NOT EXISTS idx_image_scans_is_safe ON image_scans(is_safe)") 288 + .execute(&self.pool) 289 + .await?; 290 + 266 291 // Review batches for mobile-friendly flag review 267 292 sqlx::query( 268 293 r#" ··· 928 953 .await?; 929 954 Ok(result.rows_affected() > 0) 930 955 } 956 + 957 + // ------------------------------------------------------------------------- 958 + // Image scans 959 + // ------------------------------------------------------------------------- 960 + 961 + /// Store an image scan result. 962 + pub async fn store_image_scan( 963 + &self, 964 + image_id: &str, 965 + is_safe: bool, 966 + violated_categories: &[String], 967 + severity: &str, 968 + explanation: &str, 969 + model: &str, 970 + ) -> Result<i64, sqlx::Error> { 971 + let categories_json = serde_json::to_value(violated_categories).unwrap_or_default(); 972 + sqlx::query_scalar::<_, i64>( 973 + r#" 974 + INSERT INTO image_scans (image_id, is_safe, violated_categories, severity, explanation, model) 975 + VALUES ($1, $2, $3, $4, $5, $6) 976 + RETURNING id 977 + "#, 978 + ) 979 + .bind(image_id) 980 + .bind(is_safe) 981 + .bind(categories_json) 982 + .bind(severity) 983 + .bind(explanation) 984 + .bind(model) 985 + .fetch_one(&self.pool) 986 + .await 987 + } 988 + 989 + /// Get image scan stats for cost tracking. 990 + pub async fn get_image_scan_stats(&self) -> Result<ImageScanStats, sqlx::Error> { 991 + let row: (i64, i64, i64) = sqlx::query_as( 992 + r#" 993 + SELECT 994 + COUNT(*) as total, 995 + COUNT(*) FILTER (WHERE is_safe = true) as safe, 996 + COUNT(*) FILTER (WHERE is_safe = false) as flagged 997 + FROM image_scans 998 + "#, 999 + ) 1000 + .fetch_one(&self.pool) 1001 + .await?; 1002 + 1003 + Ok(ImageScanStats { 1004 + total: row.0, 1005 + safe: row.1, 1006 + flagged: row.2, 1007 + }) 1008 + } 1009 + } 1010 + 1011 + /// Statistics for image scans. 1012 + #[derive(Debug, Clone, Serialize, Deserialize)] 1013 + pub struct ImageScanStats { 1014 + pub total: i64, 1015 + pub safe: i64, 1016 + pub flagged: i64, 931 1017 } 932 1018 933 1019 impl LabelRow {
+123 -1
moderation/src/handlers.rs
··· 1 1 //! HTTP request handlers for core endpoints. 2 2 3 - use axum::{extract::State, response::Html, Json}; 3 + use axum::{ 4 + extract::{Multipart, State}, 5 + response::Html, 6 + Json, 7 + }; 4 8 use serde::{Deserialize, Serialize}; 5 9 use tracing::info; 6 10 ··· 232 236 let urls: Vec<String> = images.iter().filter_map(|i| i.url.clone()).collect(); 233 237 234 238 Ok(Json(SensitiveImagesResponse { image_ids, urls })) 239 + } 240 + 241 + // --- image moderation --- 242 + 243 + /// Response from image scanning endpoint. 244 + #[derive(Debug, Serialize)] 245 + pub struct ScanImageResponse { 246 + pub is_safe: bool, 247 + pub reason: Option<String>, 248 + pub severity: String, 249 + pub violated_categories: Vec<String>, 250 + } 251 + 252 + /// Scan an image for policy violations using Claude vision. 253 + /// 254 + /// Accepts multipart form with: 255 + /// - `image`: the image file to scan 256 + /// - `image_id`: identifier for tracking (e.g., R2 file ID) 257 + /// 258 + /// Returns moderation result. If image is not safe, it's automatically 259 + /// added to the sensitive_images table. 260 + pub async fn scan_image( 261 + State(state): State<AppState>, 262 + mut multipart: Multipart, 263 + ) -> Result<Json<ScanImageResponse>, AppError> { 264 + let claude = state 265 + .claude 266 + .as_ref() 267 + .ok_or(AppError::ImageModerationNotConfigured)?; 268 + let db = state 269 + .db 270 + .as_ref() 271 + .ok_or(AppError::ImageModerationNotConfigured)?; 272 + 273 + let mut image_bytes: Option<Vec<u8>> = None; 274 + let mut image_id: Option<String> = None; 275 + let mut media_type = "image/png".to_string(); 276 + 277 + // Parse multipart form 278 + while let Some(field) = multipart 279 + .next_field() 280 + .await 281 + .map_err(|e| AppError::BadRequest(format!("multipart error: {e}")))? 282 + { 283 + let name = field.name().unwrap_or_default().to_string(); 284 + 285 + match name.as_str() { 286 + "image" => { 287 + // Get content type from field 288 + if let Some(ct) = field.content_type() { 289 + media_type = ct.to_string(); 290 + } 291 + image_bytes = Some( 292 + field 293 + .bytes() 294 + .await 295 + .map_err(|e| AppError::BadRequest(format!("failed to read image: {e}")))? 296 + .to_vec(), 297 + ); 298 + } 299 + "image_id" => { 300 + image_id = Some( 301 + field 302 + .text() 303 + .await 304 + .map_err(|e| AppError::BadRequest(format!("failed to read image_id: {e}")))?, 305 + ); 306 + } 307 + _ => {} 308 + } 309 + } 310 + 311 + let image_bytes = 312 + image_bytes.ok_or_else(|| AppError::BadRequest("missing 'image' field".to_string()))?; 313 + let image_id = 314 + image_id.ok_or_else(|| AppError::BadRequest("missing 'image_id' field".to_string()))?; 315 + 316 + info!(image_id = %image_id, size = image_bytes.len(), "scanning image"); 317 + 318 + // Call Claude for analysis 319 + let result = claude 320 + .analyze_image(&image_bytes, &media_type) 321 + .await 322 + .map_err(|e| AppError::Claude(e.to_string()))?; 323 + 324 + // Store scan result for cost tracking 325 + db.store_image_scan( 326 + &image_id, 327 + result.is_safe, 328 + &result.violated_categories, 329 + &result.severity, 330 + &result.explanation, 331 + "claude-sonnet-4-5-20250514", // TODO: get from client 332 + ) 333 + .await?; 334 + 335 + // If not safe, add to sensitive images 336 + if !result.is_safe { 337 + info!(image_id = %image_id, severity = %result.severity, "flagging sensitive image"); 338 + db.add_sensitive_image( 339 + Some(&image_id), 340 + None, 341 + Some(&result.explanation), 342 + Some("claude-auto"), 343 + ) 344 + .await?; 345 + } 346 + 347 + Ok(Json(ScanImageResponse { 348 + is_safe: result.is_safe, 349 + reason: if result.is_safe { 350 + None 351 + } else { 352 + Some(result.explanation) 353 + }, 354 + severity: result.severity, 355 + violated_categories: result.violated_categories, 356 + })) 235 357 } 236 358 237 359 #[cfg(test)]
+17
moderation/src/main.rs
··· 21 21 mod admin; 22 22 mod audd; 23 23 mod auth; 24 + mod claude; 24 25 mod config; 25 26 mod db; 26 27 mod handlers; ··· 60 61 (None, None, None) 61 62 }; 62 63 64 + // Initialize Claude client for image moderation if configured 65 + let claude_client = if config.claude_enabled() { 66 + let client = claude::ClaudeClient::new( 67 + config.claude_api_key.clone().unwrap(), 68 + Some(config.claude_model.clone()), 69 + ); 70 + info!(model = %config.claude_model, "claude image moderation enabled"); 71 + Some(client) 72 + } else { 73 + warn!("claude not configured - /scan-image endpoint will return 503"); 74 + None 75 + }; 76 + 63 77 let state = AppState { 64 78 audd_api_token: config.audd_api_token, 65 79 audd_api_url: config.audd_api_url, 66 80 db: db.map(Arc::new), 67 81 signer: signer.map(Arc::new), 68 82 label_tx, 83 + claude: claude_client.map(Arc::new), 69 84 }; 70 85 71 86 let app = Router::new() ··· 77 92 .route("/sensitive-images", get(handlers::get_sensitive_images)) 78 93 // AuDD scanning 79 94 .route("/scan", post(audd::scan)) 95 + // Image moderation via Claude 96 + .route("/scan-image", post(handlers::scan_image)) 80 97 // Label emission (internal API) 81 98 .route("/emit-label", post(handlers::emit_label)) 82 99 // Admin UI and API
+13
moderation/src/state.rs
··· 10 10 use tokio::sync::broadcast; 11 11 use tracing::error; 12 12 13 + use crate::claude::ClaudeClient; 13 14 use crate::db::LabelDb; 14 15 use crate::labels::{Label, LabelError, LabelSigner}; 15 16 ··· 21 22 pub db: Option<Arc<LabelDb>>, 22 23 pub signer: Option<Arc<LabelSigner>>, 23 24 pub label_tx: Option<broadcast::Sender<(i64, Label)>>, 25 + /// Claude client for image moderation (if configured) 26 + pub claude: Option<Arc<ClaudeClient>>, 24 27 } 25 28 26 29 /// Application error type. ··· 29 32 #[error("audd error: {0}")] 30 33 Audd(String), 31 34 35 + #[error("claude error: {0}")] 36 + Claude(String), 37 + 38 + #[error("image moderation not configured")] 39 + ImageModerationNotConfigured, 40 + 32 41 #[error("labeler not configured")] 33 42 LabelerNotConfigured, 34 43 ··· 53 62 error!(error = %self, "request failed"); 54 63 let (status, error_type) = match &self { 55 64 AppError::Audd(_) => (StatusCode::BAD_GATEWAY, "AuddError"), 65 + AppError::Claude(_) => (StatusCode::BAD_GATEWAY, "ClaudeError"), 66 + AppError::ImageModerationNotConfigured => { 67 + (StatusCode::SERVICE_UNAVAILABLE, "ImageModerationNotConfigured") 68 + } 56 69 AppError::LabelerNotConfigured => { 57 70 (StatusCode::SERVICE_UNAVAILABLE, "LabelerNotConfigured") 58 71 }