commit 0b965f3b6f603ffc58992b088f694d7ee9392509 · zzstoatzz.io/plyr.fm

+46 -7

backend/src/backend/_internal/moderation.py

··· 87 87 track_id: database ID of the track 88 88 result: scan result from moderation service 89 89 """ 90 + from sqlalchemy.orm import joinedload 91 + 90 92 async with db_session() as db: 91 93 is_flagged = result.get("is_flagged", False) 92 94 ··· 110 112 111 113 # emit ATProto label if flagged 112 114 if is_flagged: 113 - track = await db.scalar(select(Track).where(Track.id == track_id)) 115 + track = await db.scalar( 116 + select(Track) 117 + .options(joinedload(Track.artist)) 118 + .where(Track.id == track_id) 119 + ) 114 120 if track and track.atproto_record_uri: 115 121 await _emit_copyright_label( 116 122 uri=track.atproto_record_uri, 117 123 cid=track.atproto_record_cid, 124 + track_title=track.title, 125 + artist_handle=track.artist.handle if track.artist else None, 126 + artist_did=track.artist_did, 127 + highest_score=scan.highest_score, 128 + matches=scan.matches, 118 129 ) 119 130 120 131 121 - async def _emit_copyright_label(uri: str, cid: str | None) -> None: 132 + async def _emit_copyright_label( 133 + uri: str, 134 + cid: str | None, 135 + track_title: str | None = None, 136 + artist_handle: str | None = None, 137 + artist_did: str | None = None, 138 + highest_score: float | None = None, 139 + matches: list[dict[str, Any]] | None = None, 140 + ) -> None: 122 141 """emit a copyright-violation label to the ATProto labeler service. 123 142 124 143 this is fire-and-forget - failures are logged but don't affect the scan result. ··· 126 145 args: 127 146 uri: AT URI of the track record 128 147 cid: optional CID of the record 148 + track_title: title of the track (for admin UI context) 149 + artist_handle: handle of the artist (for admin UI context) 150 + artist_did: DID of the artist (for admin UI context) 151 + highest_score: highest match score (for admin UI context) 152 + matches: list of copyright matches (for admin UI context) 129 153 """ 130 154 try: 155 + # build context for admin UI display 156 + context: dict[str, Any] | None = None 157 + if track_title or artist_handle or matches: 158 + context = { 159 + "track_title": track_title, 160 + "artist_handle": artist_handle, 161 + "artist_did": artist_did, 162 + "highest_score": highest_score, 163 + "matches": matches, 164 + } 165 + 166 + payload: dict[str, Any] = { 167 + "uri": uri, 168 + "val": "copyright-violation", 169 + "cid": cid, 170 + } 171 + if context: 172 + payload["context"] = context 173 + 131 174 async with httpx.AsyncClient(timeout=httpx.Timeout(10.0)) as client: 132 175 response = await client.post( 133 176 f"{settings.moderation.labeler_url}/emit-label", 134 - json={ 135 - "uri": uri, 136 - "val": "copyright-violation", 137 - "cid": cid, 138 - }, 177 + json=payload, 139 178 headers={"X-Moderation-Key": settings.moderation.auth_token}, 140 179 ) 141 180 response.raise_for_status()

+13 -1

backend/tests/test_moderation.py

··· 154 154 ) as mock_emit: 155 155 await _store_scan_result(track.id, mock_moderation_response) 156 156 157 - # verify label emission was called 157 + # verify label emission was called with full context 158 158 mock_emit.assert_called_once_with( 159 159 uri="at://did:plc:labelertest/fm.plyr.track/abc123", 160 160 cid="bafyreiabc123", 161 + track_title="Labeler Test Track", 162 + artist_handle="labeler.bsky.social", 163 + artist_did="did:plc:labelertest", 164 + highest_score=85, 165 + matches=[ 166 + { 167 + "artist": "Test Artist", 168 + "title": "Test Song", 169 + "score": 85, 170 + "isrc": "USRC12345678", 171 + } 172 + ], 161 173 ) 162 174 163 175

+176 -47

moderation/src/admin.rs

··· 3 3 use axum::{extract::State, response::Html, Json}; 4 4 use serde::{Deserialize, Serialize}; 5 5 6 + use crate::db::LabelContext; 6 7 use crate::state::{AppError, AppState}; 7 8 8 9 /// A flagged track pending review. ··· 14 15 pub created_at: String, 15 16 /// If there's a negation label for this URI+val, it's been resolved. 16 17 pub resolved: bool, 18 + /// Optional context about the track (title, artist, matches). 19 + #[serde(skip_serializing_if = "Option::is_none")] 20 + pub context: Option<LabelContext>, 17 21 } 18 22 19 23 /// Response for listing flagged tracks. ··· 41 45 pub message: String, 42 46 } 43 47 48 + /// Request to store label context (for backfill). 49 + #[derive(Debug, Deserialize)] 50 + pub struct StoreContextRequest { 51 + pub uri: String, 52 + pub context: ContextPayload, 53 + } 54 + 55 + /// Context payload for storage. 56 + #[derive(Debug, Deserialize)] 57 + pub struct ContextPayload { 58 + pub track_title: Option<String>, 59 + pub artist_handle: Option<String>, 60 + pub artist_did: Option<String>, 61 + pub highest_score: Option<f64>, 62 + pub matches: Option<Vec<crate::db::CopyrightMatch>>, 63 + } 64 + 65 + /// Response after storing context. 66 + #[derive(Debug, Serialize)] 67 + pub struct StoreContextResponse { 68 + pub message: String, 69 + } 70 + 44 71 /// List all flagged tracks (copyright-violation labels without negations). 45 72 pub async fn list_flagged( 46 73 State(state): State<AppState>, ··· 79 106 })) 80 107 } 81 108 109 + /// Store context for a label (for backfill without re-emitting labels). 110 + pub async fn store_context( 111 + State(state): State<AppState>, 112 + Json(request): Json<StoreContextRequest>, 113 + ) -> Result<Json<StoreContextResponse>, AppError> { 114 + let db = state.db.as_ref().ok_or(AppError::LabelerNotConfigured)?; 115 + 116 + tracing::info!(uri = %request.uri, "storing label context"); 117 + 118 + let label_ctx = LabelContext { 119 + track_title: request.context.track_title, 120 + artist_handle: request.context.artist_handle, 121 + artist_did: request.context.artist_did, 122 + highest_score: request.context.highest_score, 123 + matches: request.context.matches, 124 + }; 125 + 126 + db.store_context(&request.uri, &label_ctx).await?; 127 + 128 + Ok(Json(StoreContextResponse { 129 + message: format!("context stored for {}", request.uri), 130 + })) 131 + } 132 + 82 133 /// Serve the admin UI HTML. 83 134 pub async fn admin_ui() -> Html<&'static str> { 84 135 Html(ADMIN_HTML) ··· 96 147 font-family: system-ui, -apple-system, sans-serif; 97 148 background: #0a0a0a; 98 149 color: #e5e5e5; 99 - max-width: 900px; 150 + max-width: 1100px; 100 151 margin: 0 auto; 101 152 padding: 20px; 102 153 line-height: 1.6; ··· 136 187 } 137 188 .status.error { display: block; background: rgba(239, 68, 68, 0.2); color: #ef4444; } 138 189 .status.success { display: block; background: rgba(34, 197, 94, 0.2); color: #22c55e; } 139 - table { 140 - width: 100%; 141 - border-collapse: collapse; 190 + .flags-list { 191 + display: flex; 192 + flex-direction: column; 193 + gap: 12px; 142 194 } 143 - th, td { 144 - text-align: left; 145 - padding: 12px; 146 - border-bottom: 1px solid #222; 195 + .flag-card { 196 + background: #111; 197 + border: 1px solid #222; 198 + border-radius: 8px; 199 + padding: 16px; 147 200 } 148 - th { color: #888; font-weight: 500; } 149 - .uri { 201 + .flag-card.resolved { opacity: 0.6; } 202 + .flag-header { 203 + display: flex; 204 + justify-content: space-between; 205 + align-items: flex-start; 206 + margin-bottom: 12px; 207 + } 208 + .track-info h3 { 209 + margin: 0 0 4px 0; 210 + color: #fff; 211 + font-size: 1.1em; 212 + } 213 + .track-info .artist { 214 + color: #888; 215 + font-size: 0.9em; 216 + } 217 + .track-info .uri { 150 218 font-family: monospace; 151 - font-size: 0.85em; 219 + font-size: 0.75em; 220 + color: #666; 152 221 word-break: break-all; 222 + margin-top: 4px; 223 + } 224 + .flag-badges { 225 + display: flex; 226 + gap: 8px; 227 + align-items: center; 153 228 } 154 229 .badge { 155 230 display: inline-block; ··· 159 234 } 160 235 .badge.pending { background: rgba(234, 179, 8, 0.2); color: #eab308; } 161 236 .badge.resolved { background: rgba(34, 197, 94, 0.2); color: #22c55e; } 237 + .badge.score { background: rgba(239, 68, 68, 0.2); color: #ef4444; } 238 + .matches { 239 + background: #0a0a0a; 240 + border-radius: 4px; 241 + padding: 12px; 242 + margin-top: 12px; 243 + } 244 + .matches h4 { 245 + margin: 0 0 8px 0; 246 + color: #888; 247 + font-size: 0.85em; 248 + font-weight: 500; 249 + } 250 + .match-item { 251 + display: flex; 252 + justify-content: space-between; 253 + padding: 6px 0; 254 + border-bottom: 1px solid #1a1a1a; 255 + font-size: 0.9em; 256 + } 257 + .match-item:last-child { border-bottom: none; } 258 + .match-item .title { color: #e5e5e5; } 259 + .match-item .artist { color: #888; } 260 + .match-item .score { 261 + color: #ef4444; 262 + font-family: monospace; 263 + } 264 + .flag-actions { 265 + display: flex; 266 + justify-content: flex-end; 267 + margin-top: 12px; 268 + padding-top: 12px; 269 + border-top: 1px solid #222; 270 + } 162 271 .resolve-btn { 163 272 background: #f59e0b; 164 273 color: #000; 165 274 border: none; 166 - padding: 6px 12px; 275 + padding: 8px 16px; 167 276 border-radius: 4px; 168 277 cursor: pointer; 169 278 font-size: 0.85em; 279 + font-weight: 500; 170 280 } 171 281 .resolve-btn:hover { background: #d97706; } 172 - .resolve-btn:disabled { background: #444; color: #888; cursor: not-allowed; } 282 + .resolve-btn:disabled { background: #333; color: #666; cursor: not-allowed; } 173 283 .empty { color: #666; text-align: center; padding: 40px; } 174 - .loading { color: #888; } 284 + .loading { color: #888; text-align: center; padding: 40px; } 175 285 .refresh-btn { 176 286 background: #333; 177 287 color: #fff; ··· 188 298 margin-bottom: 16px; 189 299 } 190 300 .header-row h2 { margin: 0; } 301 + .no-context { color: #666; font-style: italic; font-size: 0.9em; } 191 302 </style> 192 303 </head> 193 304 <body> ··· 206 317 <h2>flagged tracks</h2> 207 318 <button class="refresh-btn" onclick="loadFlags()">refresh</button> 208 319 </div> 209 - <table> 210 - <thead> 211 - <tr> 212 - <th>seq</th> 213 - <th>uri</th> 214 - <th>label</th> 215 - <th>status</th> 216 - <th>action</th> 217 - </tr> 218 - </thead> 219 - <tbody id="flags-table"> 220 - <tr><td colspan="5" class="loading">loading...</td></tr> 221 - </tbody> 222 - </table> 320 + <div id="flags-list" class="flags-list"> 321 + <div class="loading">loading...</div> 322 + </div> 223 323 </div> 224 324 225 325 <script> ··· 249 349 } 250 350 251 351 async function loadFlags() { 252 - const tbody = document.getElementById('flags-table'); 253 - tbody.innerHTML = '<tr><td colspan="5" class="loading">loading...</td></tr>'; 352 + const container = document.getElementById('flags-list'); 353 + container.innerHTML = '<div class="loading">loading...</div>'; 254 354 255 355 try { 256 356 const res = await fetch('/admin/flags', { ··· 269 369 const data = await res.json(); 270 370 271 371 if (data.tracks.length === 0) { 272 - tbody.innerHTML = '<tr><td colspan="5" class="empty">no flagged tracks</td></tr>'; 372 + container.innerHTML = '<div class="empty">no flagged tracks</div>'; 273 373 return; 274 374 } 275 375 276 - tbody.innerHTML = data.tracks.map(track => ` 277 - <tr> 278 - <td>${track.seq}</td> 279 - <td class="uri">${escapeHtml(track.uri)}</td> 280 - <td><span class="badge pending">${escapeHtml(track.val)}</span></td> 281 - <td><span class="badge ${track.resolved ? 'resolved' : 'pending'}">${track.resolved ? 'resolved' : 'pending'}</span></td> 282 - <td> 283 - <button class="resolve-btn" 284 - onclick="resolveFlag('${escapeHtml(track.uri)}', '${escapeHtml(track.val)}')" 285 - ${track.resolved ? 'disabled' : ''}> 286 - ${track.resolved ? 'resolved' : 'mark false positive'} 287 - </button> 288 - </td> 289 - </tr> 290 - `).join(''); 376 + container.innerHTML = data.tracks.map(track => { 377 + const ctx = track.context || {}; 378 + const hasContext = ctx.track_title || ctx.artist_handle; 379 + const matches = ctx.matches || []; 380 + 381 + return ` 382 + <div class="flag-card ${track.resolved ? 'resolved' : ''}"> 383 + <div class="flag-header"> 384 + <div class="track-info"> 385 + ${hasContext ? ` 386 + <h3>${escapeHtml(ctx.track_title || 'unknown track')}</h3> 387 + <div class="artist">by @${escapeHtml(ctx.artist_handle || 'unknown')}</div> 388 + ` : ` 389 + <div class="no-context">no track info available</div> 390 + `} 391 + <div class="uri">${escapeHtml(track.uri)}</div> 392 + </div> 393 + <div class="flag-badges"> 394 + ${ctx.highest_score ? `<span class="badge score">${(ctx.highest_score * 100).toFixed(0)}% match</span>` : ''} 395 + <span class="badge ${track.resolved ? 'resolved' : 'pending'}">${track.resolved ? 'resolved' : 'pending'}</span> 396 + </div> 397 + </div> 398 + ${matches.length > 0 ? ` 399 + <div class="matches"> 400 + <h4>potential matches</h4> 401 + ${matches.slice(0, 3).map(m => ` 402 + <div class="match-item"> 403 + <span><span class="title">${escapeHtml(m.title)}</span> <span class="artist">by ${escapeHtml(m.artist)}</span></span> 404 + <span class="score">${(m.score * 100).toFixed(0)}%</span> 405 + </div> 406 + `).join('')} 407 + </div> 408 + ` : ''} 409 + <div class="flag-actions"> 410 + <button class="resolve-btn" 411 + onclick="resolveFlag('${escapeHtml(track.uri)}', '${escapeHtml(track.val)}')" 412 + ${track.resolved ? 'disabled' : ''}> 413 + ${track.resolved ? 'resolved' : 'mark false positive'} 414 + </button> 415 + </div> 416 + </div> 417 + `; 418 + }).join(''); 291 419 292 420 } catch (err) { 293 - tbody.innerHTML = `<tr><td colspan="5" class="empty">error: ${err.message}</td></tr>`; 421 + container.innerHTML = `<div class="empty">error: ${err.message}</div>`; 294 422 } 295 423 } 296 424 ··· 319 447 } 320 448 321 449 function escapeHtml(str) { 450 + if (!str) return ''; 322 451 return str.replace(/&/g, '&') 323 452 .replace(/</g, '<') 324 453 .replace(/>/g, '>')

+161 -31

moderation/src/db.rs

··· 1 1 //! Database operations for the labeler. 2 2 3 3 use chrono::{DateTime, Utc}; 4 + use serde::{Deserialize, Serialize}; 4 5 use sqlx::{postgres::PgPoolOptions, PgPool}; 5 6 6 7 use crate::admin::FlaggedTrack; 7 8 use crate::labels::Label; 9 + 10 + /// Type alias for context row from database query. 11 + type ContextRow = ( 12 + Option<String>, 13 + Option<String>, 14 + Option<String>, 15 + Option<f64>, 16 + Option<serde_json::Value>, 17 + ); 18 + 19 + /// Type alias for flagged track row from database query. 20 + type FlaggedRow = ( 21 + i64, 22 + String, 23 + String, 24 + DateTime<Utc>, 25 + Option<String>, 26 + Option<String>, 27 + Option<String>, 28 + Option<f64>, 29 + Option<serde_json::Value>, 30 + ); 31 + 32 + /// Copyright match info stored alongside labels. 33 + #[derive(Debug, Clone, Serialize, Deserialize)] 34 + pub struct CopyrightMatch { 35 + pub title: String, 36 + pub artist: String, 37 + pub score: f64, 38 + } 39 + 40 + /// Context stored alongside a label for display in admin UI. 41 + #[derive(Debug, Clone, Serialize, Deserialize, Default)] 42 + pub struct LabelContext { 43 + pub track_title: Option<String>, 44 + pub artist_handle: Option<String>, 45 + pub artist_did: Option<String>, 46 + pub highest_score: Option<f64>, 47 + pub matches: Option<Vec<CopyrightMatch>>, 48 + } 8 49 9 50 /// Database connection pool and operations. 10 51 #[derive(Clone)] ··· 71 112 .execute(&self.pool) 72 113 .await?; 73 114 115 + // Label context table for admin UI display 116 + sqlx::query( 117 + r#" 118 + CREATE TABLE IF NOT EXISTS label_context ( 119 + id BIGSERIAL PRIMARY KEY, 120 + uri TEXT NOT NULL UNIQUE, 121 + track_title TEXT, 122 + artist_handle TEXT, 123 + artist_did TEXT, 124 + highest_score DOUBLE PRECISION, 125 + matches JSONB, 126 + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() 127 + ) 128 + "#, 129 + ) 130 + .execute(&self.pool) 131 + .await?; 132 + 133 + sqlx::query("CREATE INDEX IF NOT EXISTS idx_label_context_uri ON label_context(uri)") 134 + .execute(&self.pool) 135 + .await?; 136 + 74 137 Ok(()) 75 138 } 76 139 140 + /// Store or update label context for a URI. 141 + pub async fn store_context(&self, uri: &str, context: &LabelContext) -> Result<(), sqlx::Error> { 142 + let matches_json = context 143 + .matches 144 + .as_ref() 145 + .map(|m| serde_json::to_value(m).unwrap_or_default()); 146 + 147 + sqlx::query( 148 + r#" 149 + INSERT INTO label_context (uri, track_title, artist_handle, artist_did, highest_score, matches) 150 + VALUES ($1, $2, $3, $4, $5, $6) 151 + ON CONFLICT (uri) DO UPDATE SET 152 + track_title = EXCLUDED.track_title, 153 + artist_handle = EXCLUDED.artist_handle, 154 + artist_did = EXCLUDED.artist_did, 155 + highest_score = EXCLUDED.highest_score, 156 + matches = EXCLUDED.matches 157 + "#, 158 + ) 159 + .bind(uri) 160 + .bind(&context.track_title) 161 + .bind(&context.artist_handle) 162 + .bind(&context.artist_did) 163 + .bind(context.highest_score) 164 + .bind(matches_json) 165 + .execute(&self.pool) 166 + .await?; 167 + 168 + Ok(()) 169 + } 170 + 171 + /// Get label context for a URI. 172 + pub async fn get_context(&self, uri: &str) -> Result<Option<LabelContext>, sqlx::Error> { 173 + let row: Option<ContextRow> = sqlx::query_as( 174 + r#" 175 + SELECT track_title, artist_handle, artist_did, highest_score, matches 176 + FROM label_context 177 + WHERE uri = $1 178 + "#, 179 + ) 180 + .bind(uri) 181 + .fetch_optional(&self.pool) 182 + .await?; 183 + 184 + Ok(row.map(|(track_title, artist_handle, artist_did, highest_score, matches)| { 185 + LabelContext { 186 + track_title, 187 + artist_handle, 188 + artist_did, 189 + highest_score, 190 + matches: matches.and_then(|v| serde_json::from_value(v).ok()), 191 + } 192 + })) 193 + } 194 + 77 195 /// Store a signed label and return its sequence number. 78 196 pub async fn store_label(&self, label: &Label) -> Result<i64, sqlx::Error> { 79 197 let sig = label.sig.as_ref().map(|b| b.to_vec()).unwrap_or_default(); ··· 234 352 .map(|s| s.unwrap_or(0)) 235 353 } 236 354 237 - /// Get all copyright-violation labels with their resolution status. 355 + /// Get all copyright-violation labels with their resolution status and context. 238 356 /// 239 357 /// A label is resolved if there's a negation label for the same uri+val. 240 358 pub async fn get_pending_flags(&self) -> Result<Vec<FlaggedTrack>, sqlx::Error> { 241 - // Get all copyright-violation labels (non-negations) 242 - let rows = sqlx::query_as::<_, LabelRow>( 359 + // Get all copyright-violation labels with context via LEFT JOIN 360 + let rows: Vec<FlaggedRow> = sqlx::query_as( 361 + r#" 362 + SELECT l.seq, l.uri, l.val, l.cts, 363 + c.track_title, c.artist_handle, c.artist_did, c.highest_score, c.matches 364 + FROM labels l 365 + LEFT JOIN label_context c ON l.uri = c.uri 366 + WHERE l.val = 'copyright-violation' AND l.neg = false 367 + ORDER BY l.seq DESC 368 + "#, 369 + ) 370 + .fetch_all(&self.pool) 371 + .await?; 372 + 373 + // Get all negation labels 374 + let negated_uris: std::collections::HashSet<String> = sqlx::query_scalar::<_, String>( 243 375 r#" 244 - SELECT seq, src, uri, cid, val, neg, cts, exp, sig 376 + SELECT DISTINCT uri 245 377 FROM labels 246 - WHERE val = 'copyright-violation' AND neg = false 247 - ORDER BY seq DESC 378 + WHERE val = 'copyright-violation' AND neg = true 248 379 "#, 249 380 ) 250 381 .fetch_all(&self.pool) 251 - .await?; 252 - 253 - // Get all negation labels for these URIs 254 - let uris: Vec<String> = rows.iter().map(|r| r.uri.clone()).collect(); 255 - let negated_uris: std::collections::HashSet<String> = if !uris.is_empty() { 256 - sqlx::query_scalar::<_, String>( 257 - r#" 258 - SELECT DISTINCT uri 259 - FROM labels 260 - WHERE val = 'copyright-violation' AND neg = true 261 - "#, 262 - ) 263 - .fetch_all(&self.pool) 264 - .await? 265 - .into_iter() 266 - .collect() 267 - } else { 268 - std::collections::HashSet::new() 269 - }; 382 + .await? 383 + .into_iter() 384 + .collect(); 270 385 271 386 let tracks = rows 272 387 .into_iter() 273 - .map(|r| FlaggedTrack { 274 - seq: r.seq, 275 - uri: r.uri.clone(), 276 - val: r.val, 277 - created_at: r.cts.format("%Y-%m-%d %H:%M:%S").to_string(), 278 - resolved: negated_uris.contains(&r.uri), 388 + .map(|(seq, uri, val, cts, track_title, artist_handle, artist_did, highest_score, matches)| { 389 + let context = if track_title.is_some() || artist_handle.is_some() { 390 + Some(LabelContext { 391 + track_title, 392 + artist_handle, 393 + artist_did, 394 + highest_score, 395 + matches: matches.and_then(|v| serde_json::from_value(v).ok()), 396 + }) 397 + } else { 398 + None 399 + }; 400 + 401 + FlaggedTrack { 402 + seq, 403 + uri: uri.clone(), 404 + val, 405 + created_at: cts.format("%Y-%m-%d %H:%M:%S").to_string(), 406 + resolved: negated_uris.contains(&uri), 407 + context, 408 + } 279 409 }) 280 410 .collect(); 281 411

+28

moderation/src/handlers.rs

··· 4 4 use serde::{Deserialize, Serialize}; 5 5 use tracing::info; 6 6 7 + use crate::db::{CopyrightMatch, LabelContext}; 7 8 use crate::labels::Label; 8 9 use crate::state::{AppError, AppState}; 9 10 ··· 15 16 pub labeler_enabled: bool, 16 17 } 17 18 19 + /// Context info for display in admin UI. 20 + #[derive(Debug, Deserialize)] 21 + pub struct EmitLabelContext { 22 + pub track_title: Option<String>, 23 + pub artist_handle: Option<String>, 24 + pub artist_did: Option<String>, 25 + pub highest_score: Option<f64>, 26 + pub matches: Option<Vec<CopyrightMatch>>, 27 + } 28 + 18 29 #[derive(Debug, Deserialize)] 19 30 pub struct EmitLabelRequest { 20 31 /// AT URI of the resource to label (e.g., at://did:plc:xxx/fm.plyr.track/abc123) ··· 27 38 /// If true, negate an existing label 28 39 #[serde(default)] 29 40 pub neg: bool, 41 + /// Optional context for admin UI display 42 + pub context: Option<EmitLabelContext>, 30 43 } 31 44 32 45 fn default_label_val() -> String { ··· 146 159 // Store in database 147 160 let seq = db.store_label(&label).await?; 148 161 info!(seq, uri = %request.uri, "label stored"); 162 + 163 + // Store context if provided (for admin UI) 164 + if let Some(ctx) = request.context { 165 + let label_ctx = LabelContext { 166 + track_title: ctx.track_title, 167 + artist_handle: ctx.artist_handle, 168 + artist_did: ctx.artist_did, 169 + highest_score: ctx.highest_score, 170 + matches: ctx.matches, 171 + }; 172 + if let Err(e) = db.store_context(&request.uri, &label_ctx).await { 173 + // Log but don't fail - context is supplementary 174 + tracing::warn!(uri = %request.uri, error = %e, "failed to store label context"); 175 + } 176 + } 149 177 150 178 // Broadcast to subscribers 151 179 if let Some(tx) = &state.label_tx {

+1

moderation/src/main.rs

··· 79 79 .route("/admin", get(admin::admin_ui)) 80 80 .route("/admin/flags", get(admin::list_flagged)) 81 81 .route("/admin/resolve", post(admin::resolve_flag)) 82 + .route("/admin/context", post(admin::store_context)) 82 83 // ATProto XRPC endpoints (public) 83 84 .route("/xrpc/com.atproto.label.queryLabels", get(xrpc::query_labels)) 84 85 .route(

+234

scripts/backfill_copyright_labels.py

··· 1 + #!/usr/bin/env -S uv run --script --quiet --with-editable=backend 2 + # /// script 3 + # requires-python = ">=3.12" 4 + # dependencies = [ 5 + # "httpx", 6 + # "pydantic-settings", 7 + # ] 8 + # /// 9 + """backfill copyright labels for flagged tracks. 10 + 11 + usage: 12 + uv run scripts/backfill_copyright_labels.py --env prod --dry-run 13 + uv run scripts/backfill_copyright_labels.py --env prod 14 + 15 + this will: 16 + - fetch all tracks flagged in copyright_scans that have atproto_record_uri 17 + - emit labels to the moderation service for each flagged track 18 + 19 + environment variables (set in .env or export): 20 + PROD_DATABASE_URL - production database connection string 21 + STAGING_DATABASE_URL - staging database connection string 22 + MODERATION_SERVICE_URL - URL of moderation service (default: https://moderation.plyr.fm) 23 + MODERATION_AUTH_TOKEN - auth token for moderation service 24 + """ 25 + 26 + import asyncio 27 + import os 28 + import sys 29 + from typing import Literal 30 + 31 + import httpx 32 + from pydantic import Field 33 + from pydantic_settings import BaseSettings, SettingsConfigDict 34 + 35 + 36 + Environment = Literal["dev", "staging", "prod"] 37 + 38 + 39 + class BackfillSettings(BaseSettings): 40 + """settings for backfill script.""" 41 + 42 + model_config = SettingsConfigDict( 43 + env_file=".env", 44 + case_sensitive=False, 45 + extra="ignore", 46 + ) 47 + 48 + dev_database_url: str = Field(default="", validation_alias="DEV_DATABASE_URL") 49 + staging_database_url: str = Field( 50 + default="", validation_alias="STAGING_DATABASE_URL" 51 + ) 52 + prod_database_url: str = Field(default="", validation_alias="PROD_DATABASE_URL") 53 + 54 + moderation_service_url: str = Field( 55 + default="https://moderation.plyr.fm", 56 + validation_alias="MODERATION_SERVICE_URL", 57 + ) 58 + moderation_auth_token: str = Field( 59 + default="", validation_alias="MODERATION_AUTH_TOKEN" 60 + ) 61 + 62 + def get_database_url(self, env: Environment) -> str: 63 + """get database URL for environment.""" 64 + urls = { 65 + "dev": self.dev_database_url, 66 + "staging": self.staging_database_url, 67 + "prod": self.prod_database_url, 68 + } 69 + url = urls.get(env, "") 70 + if not url: 71 + raise ValueError(f"no database URL configured for {env}") 72 + return url 73 + 74 + 75 + def setup_env(settings: BackfillSettings, env: Environment) -> None: 76 + """setup environment variables for backend imports.""" 77 + db_url = settings.get_database_url(env) 78 + # ensure asyncpg driver is used 79 + if db_url.startswith("postgresql://"): 80 + db_url = db_url.replace("postgresql://", "postgresql+asyncpg://", 1) 81 + # asyncpg uses 'ssl' not 'sslmode' - convert the parameter 82 + db_url = db_url.replace("sslmode=require", "ssl=require") 83 + os.environ["DATABASE_URL"] = db_url 84 + 85 + 86 + async def emit_label( 87 + client: httpx.AsyncClient, 88 + settings: BackfillSettings, 89 + uri: str, 90 + cid: str | None, 91 + ) -> bool: 92 + """emit a copyright-violation label for a track.""" 93 + try: 94 + response = await client.post( 95 + f"{settings.moderation_service_url}/emit-label", 96 + json={ 97 + "uri": uri, 98 + "val": "copyright-violation", 99 + "cid": cid, 100 + }, 101 + headers={"X-Moderation-Key": settings.moderation_auth_token}, 102 + timeout=30.0, 103 + ) 104 + response.raise_for_status() 105 + return True 106 + except httpx.HTTPStatusError as e: 107 + print(f" ❌ HTTP error: {e.response.status_code}") 108 + try: 109 + print(f" {e.response.json()}") 110 + except Exception: 111 + print(f" {e.response.text[:200]}") 112 + return False 113 + except Exception as e: 114 + print(f" ❌ error: {e}") 115 + return False 116 + 117 + 118 + async def run_backfill(env: Environment, dry_run: bool = False) -> None: 119 + """backfill copyright labels for flagged tracks.""" 120 + settings = BackfillSettings() 121 + 122 + # validate settings 123 + try: 124 + db_url = settings.get_database_url(env) 125 + print( 126 + f"✓ database: {db_url.split('@')[1].split('/')[0] if '@' in db_url else 'configured'}" 127 + ) 128 + except ValueError as e: 129 + print(f"❌ {e}") 130 + print(f"\nset {env.upper()}_DATABASE_URL in .env") 131 + sys.exit(1) 132 + 133 + if not settings.moderation_auth_token: 134 + print("❌ MODERATION_AUTH_TOKEN not set") 135 + sys.exit(1) 136 + 137 + print(f"✓ moderation service: {settings.moderation_service_url}") 138 + 139 + # setup env before backend imports 140 + setup_env(settings, env) 141 + 142 + # import backend after env setup 143 + from sqlalchemy import select 144 + from sqlalchemy.orm import joinedload 145 + 146 + from backend.models import CopyrightScan, Track 147 + from backend.utilities.database import db_session 148 + 149 + async with db_session() as db: 150 + # find flagged tracks with atproto URIs 151 + stmt = ( 152 + select(Track) 153 + .options(joinedload(Track.artist)) 154 + .join(CopyrightScan, CopyrightScan.track_id == Track.id) 155 + .where(CopyrightScan.is_flagged.is_(True)) 156 + .where(Track.atproto_record_uri.isnot(None)) 157 + .order_by(Track.created_at.desc()) 158 + ) 159 + 160 + result = await db.execute(stmt) 161 + tracks = result.scalars().unique().all() 162 + 163 + if not tracks: 164 + print("\n✅ no flagged tracks need label backfill") 165 + return 166 + 167 + print(f"\n📋 found {len(tracks)} flagged tracks with ATProto URIs") 168 + 169 + if dry_run: 170 + print("\n[DRY RUN] would emit labels for:") 171 + for track in tracks: 172 + print(f" - {track.id}: {track.title} by @{track.artist.handle}") 173 + print(f" uri: {track.atproto_record_uri}") 174 + return 175 + 176 + # emit labels 177 + async with httpx.AsyncClient() as client: 178 + emitted = 0 179 + failed = 0 180 + 181 + for i, track in enumerate(tracks, 1): 182 + print(f"\n[{i}/{len(tracks)}] emitting label for: {track.title}") 183 + print(f" artist: @{track.artist.handle}") 184 + print(f" uri: {track.atproto_record_uri}") 185 + 186 + success = await emit_label( 187 + client, 188 + settings, 189 + track.atproto_record_uri, 190 + track.atproto_record_cid, 191 + ) 192 + 193 + if success: 194 + emitted += 1 195 + print(" ✓ label emitted") 196 + else: 197 + failed += 1 198 + 199 + print(f"\n{'=' * 50}") 200 + print("✅ backfill complete") 201 + print(f" emitted: {emitted}") 202 + print(f" failed: {failed}") 203 + 204 + 205 + def main() -> None: 206 + """main entry point.""" 207 + import argparse 208 + 209 + parser = argparse.ArgumentParser( 210 + description="backfill copyright labels for flagged tracks" 211 + ) 212 + parser.add_argument( 213 + "--env", 214 + type=str, 215 + required=True, 216 + choices=["dev", "staging", "prod"], 217 + help="environment to backfill", 218 + ) 219 + parser.add_argument( 220 + "--dry-run", 221 + action="store_true", 222 + help="show what would be emitted without making changes", 223 + ) 224 + 225 + args = parser.parse_args() 226 + 227 + print(f"🏷️ copyright label backfill - {args.env}") 228 + print("=" * 50) 229 + 230 + asyncio.run(run_backfill(args.env, args.dry_run)) 231 + 232 + 233 + if __name__ == "__main__": 234 + main()

+265

scripts/backfill_label_context.py

··· 1 + #!/usr/bin/env -S uv run --script --quiet --with-editable=backend 2 + # /// script 3 + # requires-python = ">=3.12" 4 + # dependencies = [ 5 + # "httpx", 6 + # "pydantic-settings", 7 + # ] 8 + # /// 9 + """backfill label context from copyright_scans to moderation service. 10 + 11 + this script reads flagged tracks from the backend database and populates 12 + the label_context table in the moderation service database. it does NOT 13 + emit new labels - it only adds context to existing labels. 14 + 15 + usage: 16 + uv run scripts/backfill_label_context.py --env prod --dry-run 17 + uv run scripts/backfill_label_context.py --env prod 18 + 19 + environment variables (set in .env or export): 20 + PROD_DATABASE_URL - production database connection string 21 + STAGING_DATABASE_URL - staging database connection string 22 + MODERATION_SERVICE_URL - URL of moderation service (default: https://moderation.plyr.fm) 23 + MODERATION_AUTH_TOKEN - auth token for moderation service 24 + """ 25 + 26 + import asyncio 27 + import os 28 + import sys 29 + from typing import Any, Literal 30 + 31 + import httpx 32 + from pydantic import Field 33 + from pydantic_settings import BaseSettings, SettingsConfigDict 34 + 35 + 36 + Environment = Literal["dev", "staging", "prod"] 37 + 38 + 39 + class BackfillSettings(BaseSettings): 40 + """settings for backfill script.""" 41 + 42 + model_config = SettingsConfigDict( 43 + env_file=".env", 44 + case_sensitive=False, 45 + extra="ignore", 46 + ) 47 + 48 + dev_database_url: str = Field(default="", validation_alias="DEV_DATABASE_URL") 49 + staging_database_url: str = Field( 50 + default="", validation_alias="STAGING_DATABASE_URL" 51 + ) 52 + prod_database_url: str = Field(default="", validation_alias="PROD_DATABASE_URL") 53 + 54 + moderation_service_url: str = Field( 55 + default="https://moderation.plyr.fm", 56 + validation_alias="MODERATION_SERVICE_URL", 57 + ) 58 + moderation_auth_token: str = Field( 59 + default="", validation_alias="MODERATION_AUTH_TOKEN" 60 + ) 61 + 62 + def get_database_url(self, env: Environment) -> str: 63 + """get database URL for environment.""" 64 + urls = { 65 + "dev": self.dev_database_url, 66 + "staging": self.staging_database_url, 67 + "prod": self.prod_database_url, 68 + } 69 + url = urls.get(env, "") 70 + if not url: 71 + raise ValueError(f"no database URL configured for {env}") 72 + return url 73 + 74 + 75 + def setup_env(settings: BackfillSettings, env: Environment) -> None: 76 + """setup environment variables for backend imports.""" 77 + db_url = settings.get_database_url(env) 78 + # ensure asyncpg driver is used 79 + if db_url.startswith("postgresql://"): 80 + db_url = db_url.replace("postgresql://", "postgresql+asyncpg://", 1) 81 + # asyncpg uses 'ssl' not 'sslmode' - convert the parameter 82 + db_url = db_url.replace("sslmode=require", "ssl=require") 83 + os.environ["DATABASE_URL"] = db_url 84 + 85 + 86 + async def store_context( 87 + client: httpx.AsyncClient, 88 + settings: BackfillSettings, 89 + uri: str, 90 + context: dict[str, Any], 91 + ) -> bool: 92 + """store context directly via emit-label endpoint. 93 + 94 + we send a "dummy" emit that just stores context for an existing label. 95 + the moderation service will upsert the context without creating a new label 96 + if we use neg=false and the label already exists (it just updates context). 97 + 98 + actually, we need a dedicated endpoint for this. let's use a workaround: 99 + call emit-label with the context - it will store the context even though 100 + the label already exists (store_context uses ON CONFLICT DO UPDATE). 101 + """ 102 + try: 103 + # we need to call emit-label to trigger context storage 104 + # but we don't want to create duplicate labels 105 + # the backend will reject duplicate labels, so we just send context 106 + # via a new endpoint we need to add... or we can use a hack: 107 + # just POST to emit-label with context - it will store label + context 108 + # but since label already exists, we'll get an error... hmm 109 + 110 + # actually, looking at the code, store_label will create a new label row 111 + # each time (no unique constraint on uri+val). that's intentional for 112 + # labeler protocol. so we can't use emit-label for backfill. 113 + 114 + # we need a dedicated endpoint. let's add /admin/context for this. 115 + response = await client.post( 116 + f"{settings.moderation_service_url}/admin/context", 117 + json={ 118 + "uri": uri, 119 + "context": context, 120 + }, 121 + headers={"X-Moderation-Key": settings.moderation_auth_token}, 122 + timeout=30.0, 123 + ) 124 + response.raise_for_status() 125 + return True 126 + except httpx.HTTPStatusError as e: 127 + print(f" ❌ HTTP error: {e.response.status_code}") 128 + try: 129 + print(f" {e.response.json()}") 130 + except Exception: 131 + print(f" {e.response.text[:200]}") 132 + return False 133 + except Exception as e: 134 + print(f" ❌ error: {e}") 135 + return False 136 + 137 + 138 + async def run_backfill(env: Environment, dry_run: bool = False) -> None: 139 + """backfill label context from copyright_scans.""" 140 + settings = BackfillSettings() 141 + 142 + # validate settings 143 + try: 144 + db_url = settings.get_database_url(env) 145 + print( 146 + f"✓ database: {db_url.split('@')[1].split('/')[0] if '@' in db_url else 'configured'}" 147 + ) 148 + except ValueError as e: 149 + print(f"❌ {e}") 150 + print(f"\nset {env.upper()}_DATABASE_URL in .env") 151 + sys.exit(1) 152 + 153 + if not settings.moderation_auth_token: 154 + print("❌ MODERATION_AUTH_TOKEN not set") 155 + sys.exit(1) 156 + 157 + print(f"✓ moderation service: {settings.moderation_service_url}") 158 + 159 + # setup env before backend imports 160 + setup_env(settings, env) 161 + 162 + # import backend after env setup 163 + from sqlalchemy import select 164 + from sqlalchemy.orm import joinedload 165 + 166 + from backend.models import CopyrightScan, Track 167 + from backend.utilities.database import db_session 168 + 169 + async with db_session() as db: 170 + # find flagged tracks with atproto URIs and their scan results 171 + stmt = ( 172 + select(Track, CopyrightScan) 173 + .options(joinedload(Track.artist)) 174 + .join(CopyrightScan, CopyrightScan.track_id == Track.id) 175 + .where(CopyrightScan.is_flagged.is_(True)) 176 + .where(Track.atproto_record_uri.isnot(None)) 177 + .order_by(Track.created_at.desc()) 178 + ) 179 + 180 + result = await db.execute(stmt) 181 + rows = result.unique().all() 182 + 183 + if not rows: 184 + print("\n✅ no flagged tracks to backfill context for") 185 + return 186 + 187 + print(f"\n📋 found {len(rows)} flagged tracks with context to backfill") 188 + 189 + if dry_run: 190 + print("\n[DRY RUN] would store context for:") 191 + for track, scan in rows: 192 + print(f" - {track.id}: {track.title} by @{track.artist.handle}") 193 + print(f" uri: {track.atproto_record_uri}") 194 + print( 195 + f" score: {scan.highest_score}, matches: {len(scan.matches or [])}" 196 + ) 197 + return 198 + 199 + # store context for each track 200 + async with httpx.AsyncClient() as client: 201 + stored = 0 202 + failed = 0 203 + 204 + for i, (track, scan) in enumerate(rows, 1): 205 + print(f"\n[{i}/{len(rows)}] storing context for: {track.title}") 206 + print(f" artist: @{track.artist.handle}") 207 + print(f" uri: {track.atproto_record_uri}") 208 + 209 + context = { 210 + "track_title": track.title, 211 + "artist_handle": track.artist.handle if track.artist else None, 212 + "artist_did": track.artist_did, 213 + "highest_score": scan.highest_score, 214 + "matches": scan.matches, 215 + } 216 + 217 + success = await store_context( 218 + client, 219 + settings, 220 + track.atproto_record_uri, 221 + context, 222 + ) 223 + 224 + if success: 225 + stored += 1 226 + print(" ✓ context stored") 227 + else: 228 + failed += 1 229 + 230 + print(f"\n{'=' * 50}") 231 + print("✅ backfill complete") 232 + print(f" stored: {stored}") 233 + print(f" failed: {failed}") 234 + 235 + 236 + def main() -> None: 237 + """main entry point.""" 238 + import argparse 239 + 240 + parser = argparse.ArgumentParser( 241 + description="backfill label context from copyright_scans" 242 + ) 243 + parser.add_argument( 244 + "--env", 245 + type=str, 246 + required=True, 247 + choices=["dev", "staging", "prod"], 248 + help="environment to backfill", 249 + ) 250 + parser.add_argument( 251 + "--dry-run", 252 + action="store_true", 253 + help="show what would be stored without making changes", 254 + ) 255 + 256 + args = parser.parse_args() 257 + 258 + print(f"🏷️ label context backfill - {args.env}") 259 + print("=" * 50) 260 + 261 + asyncio.run(run_backfill(args.env, args.dry_run)) 262 + 263 + 264 + if __name__ == "__main__": 265 + main()