scripts/moderation_agent.py at main · zzstoatzz.io/plyr.fm

zzstoatzz.io / plyr.fm
music on atproto plyr.fm
plyr.fm / scripts / moderation_agent.py
at main 21 kB view raw
  1#!/usr/bin/env -S uv run --script --quiet
  2# /// script
  3# requires-python = ">=3.12"
  4# dependencies = [
  5#     "pydantic-ai>=0.1.0",
  6#     "anthropic",
  7#     "httpx",
  8#     "pydantic>=2.0",
  9#     "pydantic-settings",
 10#     "rich",
 11# ]
 12# ///
 13"""AI-powered moderation review agent for plyr.fm copyright flags.
 14
 15this agent:
 161. fetches all pending copyright flags from the moderation service
 172. analyzes each flag using AI to categorize as likely violation or false positive
 183. presents a summary for human review
 194. bulk resolves flags with human approval
 20
 21usage:
 22    uv run scripts/moderation_agent.py --env prod
 23    uv run scripts/moderation_agent.py --env prod --dry-run
 24    uv run scripts/moderation_agent.py --env staging --auto-resolve
 25
 26environment variables:
 27    MODERATION_SERVICE_URL - URL of moderation service (default: https://plyr-moderation.fly.dev)
 28    MODERATION_AUTH_TOKEN - auth token for moderation service
 29    ANTHROPIC_API_KEY - API key for Claude
 30"""
 31
 32from __future__ import annotations
 33
 34import argparse
 35import asyncio
 36from dataclasses import dataclass, field
 37from enum import Enum
 38from pathlib import Path
 39from typing import Literal
 40
 41import httpx
 42from pydantic import BaseModel, Field
 43from pydantic_ai import Agent
 44from pydantic_ai.models.anthropic import AnthropicModel
 45from pydantic_settings import BaseSettings, SettingsConfigDict
 46from rich.console import Console
 47from rich.panel import Panel
 48from rich.prompt import Confirm
 49from rich.table import Table
 50
 51console = Console()
 52
 53
 54# --- settings ---
 55
 56
 57class AgentSettings(BaseSettings):
 58    """settings for moderation agent."""
 59
 60    model_config = SettingsConfigDict(
 61        env_file=Path(__file__).parent.parent / ".env",
 62        case_sensitive=False,
 63        extra="ignore",
 64    )
 65
 66    moderation_service_url: str = Field(
 67        default="https://moderation.plyr.fm",
 68        validation_alias="MODERATION_SERVICE_URL",
 69    )
 70    moderation_auth_token: str = Field(
 71        default="", validation_alias="MODERATION_AUTH_TOKEN"
 72    )
 73    anthropic_api_key: str = Field(default="", validation_alias="ANTHROPIC_API_KEY")
 74
 75
 76# --- models ---
 77
 78
 79class CopyrightMatch(BaseModel):
 80    """a potential copyright match from AuDD."""
 81
 82    title: str
 83    artist: str
 84    score: float
 85
 86
 87class LabelContext(BaseModel):
 88    """context stored with a copyright flag."""
 89
 90    track_id: int | None = None
 91    track_title: str | None = None
 92    artist_handle: str | None = None
 93    artist_did: str | None = None
 94    highest_score: float | None = None
 95    matches: list[CopyrightMatch] | None = None
 96    resolution_reason: str | None = None
 97    resolution_notes: str | None = None
 98
 99
100class FlaggedTrack(BaseModel):
101    """a flagged track pending review."""
102
103    seq: int
104    uri: str
105    val: str
106    created_at: str
107    resolved: bool
108    context: LabelContext | None = None
109
110
111class Category(str, Enum):
112    """classification category for a flagged track."""
113
114    LIKELY_VIOLATION = "likely_violation"
115    LIKELY_FALSE_POSITIVE = "likely_false_positive"
116    NEEDS_REVIEW = "needs_review"
117
118
119class ResolutionReason(str, Enum):
120    """reason for resolving a false positive."""
121
122    ORIGINAL_ARTIST = "original_artist"
123    LICENSED = "licensed"
124    FINGERPRINT_NOISE = "fingerprint_noise"
125    COVER_VERSION = "cover_version"
126    OTHER = "other"
127
128
129class FlagAnalysis(BaseModel):
130    """AI analysis of a single flagged track."""
131
132    category: Category
133    confidence: float = Field(ge=0.0, le=1.0)
134    reasoning: str
135    suggested_reason: ResolutionReason | None = None
136
137
138class BatchAnalysis(BaseModel):
139    """AI analysis of a batch of flagged tracks."""
140
141    likely_violations: list[str] = Field(
142        default_factory=list, description="URIs of tracks likely violating copyright"
143    )
144    likely_false_positives: list[str] = Field(
145        default_factory=list, description="URIs of tracks likely false positives"
146    )
147    needs_review: list[str] = Field(
148        default_factory=list, description="URIs needing human review"
149    )
150    summary: str = Field(description="brief summary of the analysis")
151    per_track_analysis: dict[str, FlagAnalysis] = Field(
152        default_factory=dict, description="detailed analysis per URI"
153    )
154
155
156# --- moderation service client ---
157
158
159@dataclass
160class ModerationClient:
161    """client for the moderation service API."""
162
163    base_url: str
164    auth_token: str
165    _client: httpx.AsyncClient = field(init=False, repr=False)
166
167    def __post_init__(self) -> None:
168        self._client = httpx.AsyncClient(
169            base_url=self.base_url,
170            headers={"X-Moderation-Key": self.auth_token},
171            timeout=30.0,
172        )
173
174    async def close(self) -> None:
175        await self._client.aclose()
176
177    async def list_flags(
178        self, filter: Literal["pending", "resolved", "all"] = "pending"
179    ) -> list[FlaggedTrack]:
180        """list flagged tracks from the moderation service."""
181        response = await self._client.get("/admin/flags", params={"filter": filter})
182        response.raise_for_status()
183        data = response.json()
184        return [FlaggedTrack.model_validate(t) for t in data["tracks"]]
185
186    async def resolve_flag(
187        self,
188        uri: str,
189        reason: ResolutionReason,
190        notes: str | None = None,
191    ) -> dict:
192        """resolve (negate) a copyright flag."""
193        payload = {
194            "uri": uri,
195            "val": "copyright-violation",
196            "reason": reason.value,
197        }
198        if notes:
199            payload["notes"] = notes
200        response = await self._client.post("/admin/resolve", json=payload)
201        response.raise_for_status()
202        return response.json()
203
204
205# --- agent setup ---
206
207SYSTEM_PROMPT = """\
208you are a copyright moderation analyst for plyr.fm, a music streaming platform.
209
210your task is to review flagged tracks and categorize them as:
211- LIKELY_VIOLATION: high confidence this is actual copyright infringement
212- LIKELY_FALSE_POSITIVE: high confidence this is NOT infringement (original artist, licensed, etc.)
213- NEEDS_REVIEW: uncertain, requires human judgment
214
215IMPORTANT: do NOT rely heavily on match scores. the scores from our fingerprinting
216system are often unreliable (many show 0.00 even for real matches). instead, focus on:
217
2181. TITLE AND ARTIST NAME MATCHING (most important):
219   - does the matched song title match or closely resemble the uploaded track title?
220   - does the matched artist name match or resemble the uploader's handle/name?
221   - are the matched songs well-known commercial tracks?
222
2232. ORIGINAL ARTIST indicators (false positive):
224   - artist handle matches or is similar to matched artist name
225   - track title matches the uploaded track title exactly
226   - artist is likely uploading their own distributed music
227
2283. FINGERPRINT NOISE indicators (false positive):
229   - matched songs are from completely different genres
230   - matched titles have no relation to uploaded track title
231   - multiple unrelated matches with no common theme
232   - matched artists are obscure and unrelated to track content
233
2344. LICENSED/COVER indicators (false positive):
235   - track explicitly labeled as cover, remix, or tribute in title
236   - common phrases in titles suggesting original content
237
2385. LIKELY VIOLATION indicators:
239   - matched song title is identical or very similar to uploaded track
240   - matched artist is a well-known commercial artist (e.g., major label)
241   - matched artist is clearly different from uploader
242   - multiple matches to the SAME copyrighted work
243
244be conservative: when in doubt, categorize as NEEDS_REVIEW rather than auto-resolving.
245provide clear reasoning for each categorization, focusing on name/title analysis.
246
247for false positives, suggest the most appropriate resolution reason:
248- original_artist: uploader is the matched artist
249- licensed: uploader has rights to use the content
250- fingerprint_noise: audio fingerprinting error (unrelated matches)
251- cover_version: legal cover or remix
252- other: doesn't fit other categories
253"""
254
255
256def create_agent(api_key: str) -> Agent[None, BatchAnalysis]:
257    """create the moderation analysis agent."""
258    from pydantic_ai.providers.anthropic import AnthropicProvider
259
260    provider = AnthropicProvider(api_key=api_key)
261    return Agent(
262        model=AnthropicModel("claude-sonnet-4-20250514", provider=provider),
263        output_type=BatchAnalysis,
264        system_prompt=SYSTEM_PROMPT,
265    )
266
267
268# --- main logic ---
269
270
271def format_track_for_analysis(track: FlaggedTrack) -> str:
272    """format a track for inclusion in agent prompt."""
273    ctx = track.context
274    lines = [f"URI: {track.uri}"]
275
276    if ctx:
277        if ctx.track_title:
278            lines.append(f"Uploaded Track: {ctx.track_title}")
279        if ctx.artist_handle:
280            lines.append(f"Uploader: @{ctx.artist_handle}")
281        if ctx.matches:
282            lines.append("Copyright Matches:")
283            for m in ctx.matches[:5]:  # limit to top 5
284                lines.append(f"  - '{m.title}' by {m.artist} (score: {m.score:.2f})")
285    else:
286        lines.append("(no context available)")
287
288    return "\n".join(lines)
289
290
291def truncate(s: str, max_len: int) -> str:
292    """truncate string with ellipsis if needed."""
293    if len(s) <= max_len:
294        return s
295    return s[: max_len - 1] + "…"
296
297
298def display_analysis_summary(
299    analysis: BatchAnalysis,
300    tracks: dict[str, FlaggedTrack],
301) -> None:
302    """display a rich summary of the analysis."""
303    console.print()
304    console.print(
305        Panel(analysis.summary, title="analysis summary", border_style="blue")
306    )
307
308    # likely violations
309    if analysis.likely_violations:
310        table = Table(
311            title="likely violations",
312            border_style="red",
313            show_lines=True,
314            padding=(0, 1),
315        )
316        table.add_column("#", style="dim", width=3)
317        table.add_column("track", style="red", max_width=25)
318        table.add_column("matches", max_width=30)
319        table.add_column("conf", width=5)
320        table.add_column("reasoning", max_width=50)
321
322        for i, uri in enumerate(analysis.likely_violations, 1):
323            track = tracks.get(uri)
324            info = analysis.per_track_analysis.get(uri)
325            ctx = track.context if track else None
326
327            title = truncate(ctx.track_title, 24) if ctx and ctx.track_title else "-"
328            matches = (
329                truncate(", ".join(f"{m.artist}" for m in ctx.matches[:2]), 29)
330                if ctx and ctx.matches
331                else "-"
332            )
333            conf = f"{info.confidence:.0%}" if info else "-"
334            reasoning = truncate(info.reasoning, 49) if info else "-"
335
336            table.add_row(str(i), title, matches, conf, reasoning)
337
338        console.print(table)
339
340    # likely false positives
341    if analysis.likely_false_positives:
342        table = Table(
343            title="likely false positives",
344            border_style="green",
345            padding=(0, 1),
346        )
347        table.add_column("#", style="dim", width=3)
348        table.add_column("track", style="green", max_width=30)
349        table.add_column("artist", max_width=18)
350        table.add_column("reason", width=18)
351        table.add_column("conf", width=5)
352
353        for i, uri in enumerate(analysis.likely_false_positives, 1):
354            track = tracks.get(uri)
355            info = analysis.per_track_analysis.get(uri)
356            ctx = track.context if track else None
357
358            title = truncate(ctx.track_title, 29) if ctx and ctx.track_title else "-"
359            artist = (
360                truncate(f"@{ctx.artist_handle}", 17)
361                if ctx and ctx.artist_handle
362                else "-"
363            )
364            reason = (
365                info.suggested_reason.value if info and info.suggested_reason else "-"
366            )
367            conf = f"{info.confidence:.0%}" if info else "-"
368
369            table.add_row(str(i), title, artist, reason, conf)
370
371        console.print(table)
372
373        # show full reasoning below
374        console.print()
375        console.print("[bold]reasoning:[/bold]")
376        for i, uri in enumerate(analysis.likely_false_positives, 1):
377            info = analysis.per_track_analysis.get(uri)
378            if info:
379                console.print(f"  [dim]{i}.[/dim] {info.reasoning}")
380
381    # needs review
382    if analysis.needs_review:
383        table = Table(
384            title="needs manual review",
385            border_style="yellow",
386            show_lines=True,
387            padding=(0, 1),
388        )
389        table.add_column("#", style="dim", width=3)
390        table.add_column("track", style="yellow", max_width=25)
391        table.add_column("artist", max_width=15)
392        table.add_column("matches", max_width=25)
393        table.add_column("reasoning", max_width=50)
394
395        for i, uri in enumerate(analysis.needs_review, 1):
396            track = tracks.get(uri)
397            info = analysis.per_track_analysis.get(uri)
398            ctx = track.context if track else None
399
400            title = truncate(ctx.track_title, 24) if ctx and ctx.track_title else "-"
401            artist = (
402                truncate(f"@{ctx.artist_handle}", 14)
403                if ctx and ctx.artist_handle
404                else "-"
405            )
406            matches = (
407                truncate(", ".join(f"{m.artist}" for m in ctx.matches[:2]), 24)
408                if ctx and ctx.matches
409                else "-"
410            )
411            reasoning = truncate(info.reasoning, 49) if info else "-"
412
413            table.add_row(str(i), title, artist, matches, reasoning)
414
415        console.print(table)
416
417    # summary stats
418    console.print()
419    console.print("[bold]totals:[/bold]")
420    console.print(f"  likely violations: [red]{len(analysis.likely_violations)}[/red]")
421    console.print(
422        f"  likely false positives: [green]{len(analysis.likely_false_positives)}[/green]"
423    )
424    console.print(f"  needs review: [yellow]{len(analysis.needs_review)}[/yellow]")
425
426
427async def run_agent(
428    env: str,
429    dry_run: bool = False,
430    auto_resolve: bool = False,
431    limit: int | None = None,
432) -> None:
433    """run the moderation agent."""
434    settings = AgentSettings()
435
436    if not settings.moderation_auth_token:
437        console.print("[red]error: MODERATION_AUTH_TOKEN not set[/red]")
438        return
439
440    if not settings.anthropic_api_key:
441        console.print("[red]error: ANTHROPIC_API_KEY not set[/red]")
442        return
443
444    console.print(f"[bold]moderation agent[/bold] - {env}")
445    console.print(f"service: {settings.moderation_service_url}")
446    console.print()
447
448    # fetch pending flags
449    client = ModerationClient(
450        base_url=settings.moderation_service_url,
451        auth_token=settings.moderation_auth_token,
452    )
453
454    try:
455        console.print("[dim]fetching pending flags...[/dim]")
456        flags = await client.list_flags(filter="pending")
457
458        if not flags:
459            console.print("[green]no pending flags[/green]")
460            return
461
462        if limit:
463            flags = flags[:limit]
464            console.print(f"[bold]found {len(flags)} pending flags (limited)[/bold]")
465        else:
466            console.print(f"[bold]found {len(flags)} pending flags[/bold]")
467
468        # build analysis prompt
469        tracks_by_uri = {f.uri: f for f in flags}
470        track_descriptions = [format_track_for_analysis(f) for f in flags]
471
472        # process in batches to avoid context limits
473        batch_size = 20
474        all_analyses: list[BatchAnalysis] = []
475        agent = create_agent(settings.anthropic_api_key)
476
477        for batch_start in range(0, len(flags), batch_size):
478            batch_end = min(batch_start + batch_size, len(flags))
479            batch_flags = flags[batch_start:batch_end]
480            batch_descriptions = track_descriptions[batch_start:batch_end]
481
482            console.print(
483                f"[dim]analyzing batch {batch_start // batch_size + 1} "
484                f"({batch_start + 1}-{batch_end} of {len(flags)})...[/dim]"
485            )
486
487            prompt = f"""\
488analyze these {len(batch_flags)} flagged tracks and categorize EACH one.
489
490IMPORTANT: You MUST include EVERY track URI in exactly one of these lists:
491- likely_violations
492- likely_false_positives
493- needs_review
494
495Also provide per_track_analysis with details for each URI.
496
497---
498{chr(10).join(f"### Track {i + 1}{chr(10)}{desc}{chr(10)}" for i, desc in enumerate(batch_descriptions))}
499---
500
501For each track:
5021. Add its URI to the appropriate category list
5032. Add an entry to per_track_analysis with the URI as key
5043. Include confidence (0.0-1.0), reasoning, and suggested_reason for false positives
505"""
506
507            result = await agent.run(prompt)
508            all_analyses.append(result.output)
509
510        # merge all batch results
511        analysis = BatchAnalysis(
512            likely_violations=[],
513            likely_false_positives=[],
514            needs_review=[],
515            summary="",
516            per_track_analysis={},
517        )
518        for batch in all_analyses:
519            analysis.likely_violations.extend(batch.likely_violations)
520            analysis.likely_false_positives.extend(batch.likely_false_positives)
521            analysis.needs_review.extend(batch.needs_review)
522            analysis.per_track_analysis.update(batch.per_track_analysis)
523
524        # generate summary
525        analysis.summary = (
526            f"analyzed {len(flags)} tracks: "
527            f"{len(analysis.likely_violations)} likely violations, "
528            f"{len(analysis.likely_false_positives)} likely false positives, "
529            f"{len(analysis.needs_review)} need review"
530        )
531
532        # debug: show raw counts
533        console.print(
534            f"[dim]raw analysis: {len(analysis.likely_violations)} violations, "
535            f"{len(analysis.likely_false_positives)} false positives, "
536            f"{len(analysis.needs_review)} needs review[/dim]"
537        )
538        console.print(
539            f"[dim]per_track_analysis entries: {len(analysis.per_track_analysis)}[/dim]"
540        )
541
542        # display results
543        display_analysis_summary(analysis, tracks_by_uri)
544
545        if dry_run:
546            console.print("\n[yellow][DRY RUN] no changes made[/yellow]")
547            return
548
549        # handle false positives
550        if analysis.likely_false_positives:
551            console.print()
552
553            if auto_resolve:
554                proceed = True
555                console.print(
556                    "[yellow]auto-resolve enabled - proceeding without confirmation[/yellow]"
557                )
558            else:
559                proceed = Confirm.ask(
560                    f"resolve {len(analysis.likely_false_positives)} likely false positives?"
561                )
562
563            if proceed:
564                resolved = 0
565                for uri in analysis.likely_false_positives:
566                    track_analysis = analysis.per_track_analysis.get(uri)
567                    reason = (
568                        track_analysis.suggested_reason
569                        if track_analysis and track_analysis.suggested_reason
570                        else ResolutionReason.OTHER
571                    )
572                    notes = (
573                        f"AI analysis: {track_analysis.reasoning}"
574                        if track_analysis
575                        else "AI categorized as false positive"
576                    )
577
578                    try:
579                        await client.resolve_flag(uri, reason, notes)
580                        resolved += 1
581                        console.print(
582                            f"  [green]\u2713[/green] resolved: {uri[:60]}..."
583                        )
584                    except Exception as e:
585                        console.print(
586                            f"  [red]\u2717[/red] failed: {uri[:60]}... ({e})"
587                        )
588
589                console.print(f"\n[green]resolved {resolved} flags[/green]")
590
591    finally:
592        await client.close()
593
594
595def main() -> None:
596    """main entry point."""
597    parser = argparse.ArgumentParser(description="AI moderation review agent")
598    parser.add_argument(
599        "--env",
600        type=str,
601        default="prod",
602        choices=["dev", "staging", "prod"],
603        help="environment (for display only)",
604    )
605    parser.add_argument(
606        "--dry-run",
607        action="store_true",
608        help="analyze flags without making changes",
609    )
610    parser.add_argument(
611        "--auto-resolve",
612        action="store_true",
613        help="resolve false positives without confirmation",
614    )
615    parser.add_argument(
616        "--limit",
617        type=int,
618        default=None,
619        help="limit number of flags to process",
620    )
621
622    args = parser.parse_args()
623
624    asyncio.run(run_agent(args.env, args.dry_run, args.auto_resolve, args.limit))
625
626
627if __name__ == "__main__":
628    main()