fix: stop auto-emitting copyright labels, add configurable threshold (#703)

changes:
- remove auto-label emission from _store_scan_result (was creating liability)
- add MODERATION_COPYRIGHT_SCORE_THRESHOLD env var (default: 85)
- remove tests that expected auto-label emission

scanning still runs and stores results internally for future use
when we build the notification + action pipeline.

refs #702

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>

authored by zzstoatzz.io Claude Opus 4.5 and committed by GitHub df0c0dae 078bd384

Changed files
+85 -112
backend
src
backend
_internal
tests
docs
moderation
+3 -21
backend/src/backend/_internal/moderation.py
··· 4 4 from typing import Any 5 5 6 6 import logfire 7 - from sqlalchemy import select 8 - from sqlalchemy.orm import joinedload 9 7 10 8 from backend._internal.moderation_client import get_moderation_client 11 9 from backend.config import settings 12 - from backend.models import CopyrightScan, Track 10 + from backend.models import CopyrightScan 13 11 from backend.utilities.database import db_session 14 12 15 13 logger = logging.getLogger(__name__) ··· 80 78 match_count=len(scan.matches), 81 79 ) 82 80 83 - # emit ATProto label if flagged 84 - if result.is_flagged: 85 - track = await db.scalar( 86 - select(Track) 87 - .options(joinedload(Track.artist)) 88 - .where(Track.id == track_id) 89 - ) 90 - if track and track.atproto_record_uri: 91 - await _emit_copyright_label( 92 - uri=track.atproto_record_uri, 93 - cid=track.atproto_record_cid, 94 - track_id=track_id, 95 - track_title=track.title, 96 - artist_handle=track.artist.handle if track.artist else None, 97 - artist_did=track.artist_did, 98 - highest_score=scan.highest_score, 99 - matches=scan.matches, 100 - ) 81 + # auto-label emission removed - see https://github.com/zzstoatzz/plyr.fm/issues/702 82 + # labels will be emitted after user notification + grace period (future work) 101 83 102 84 103 85 async def _emit_copyright_label(
-90
backend/tests/test_moderation.py
··· 146 146 assert scan.matches[0]["artist"] == "Test Artist" 147 147 148 148 149 - async def test_scan_track_emits_label_when_flagged( 150 - db_session: AsyncSession, 151 - mock_scan_result: ScanResult, 152 - ) -> None: 153 - """test that flagged scan result emits ATProto label.""" 154 - artist = Artist( 155 - did="did:plc:labelertest", 156 - handle="labeler.bsky.social", 157 - display_name="Labeler Test User", 158 - ) 159 - db_session.add(artist) 160 - await db_session.commit() 161 - 162 - track = Track( 163 - title="Labeler Test Track", 164 - file_id="labeler_test_file", 165 - file_type="mp3", 166 - artist_did=artist.did, 167 - r2_url="https://example.com/audio.mp3", 168 - atproto_record_uri="at://did:plc:labelertest/fm.plyr.track/abc123", 169 - atproto_record_cid="bafyreiabc123", 170 - ) 171 - db_session.add(track) 172 - await db_session.commit() 173 - 174 - with patch("backend._internal.moderation.settings") as mock_settings: 175 - mock_settings.moderation.enabled = True 176 - mock_settings.moderation.auth_token = "test-token" 177 - 178 - with patch( 179 - "backend._internal.moderation.get_moderation_client" 180 - ) as mock_get_client: 181 - mock_client = AsyncMock() 182 - mock_client.scan.return_value = mock_scan_result 183 - mock_client.emit_label = AsyncMock() 184 - mock_get_client.return_value = mock_client 185 - 186 - assert track.r2_url is not None 187 - await scan_track_for_copyright(track.id, track.r2_url) 188 - 189 - # verify label was emitted 190 - mock_client.emit_label.assert_called_once() 191 - call_kwargs = mock_client.emit_label.call_args.kwargs 192 - assert call_kwargs["uri"] == "at://did:plc:labelertest/fm.plyr.track/abc123" 193 - assert call_kwargs["cid"] == "bafyreiabc123" 194 - 195 - 196 - async def test_scan_track_no_label_without_atproto_uri( 197 - db_session: AsyncSession, 198 - mock_scan_result: ScanResult, 199 - ) -> None: 200 - """test that flagged scan without ATProto URI skips label emission.""" 201 - artist = Artist( 202 - did="did:plc:nouri", 203 - handle="nouri.bsky.social", 204 - display_name="No URI User", 205 - ) 206 - db_session.add(artist) 207 - await db_session.commit() 208 - 209 - track = Track( 210 - title="No URI Track", 211 - file_id="nouri_file", 212 - file_type="mp3", 213 - artist_did=artist.did, 214 - r2_url="https://example.com/audio.mp3", 215 - # no atproto_record_uri 216 - ) 217 - db_session.add(track) 218 - await db_session.commit() 219 - 220 - with patch("backend._internal.moderation.settings") as mock_settings: 221 - mock_settings.moderation.enabled = True 222 - mock_settings.moderation.auth_token = "test-token" 223 - 224 - with patch( 225 - "backend._internal.moderation.get_moderation_client" 226 - ) as mock_get_client: 227 - mock_client = AsyncMock() 228 - mock_client.scan.return_value = mock_scan_result 229 - mock_client.emit_label = AsyncMock() 230 - mock_get_client.return_value = mock_client 231 - 232 - assert track.r2_url is not None 233 - await scan_track_for_copyright(track.id, track.r2_url) 234 - 235 - # label emission should not be called 236 - mock_client.emit_label.assert_not_called() 237 - 238 - 239 149 async def test_scan_track_stores_clear_result( 240 150 db_session: AsyncSession, 241 151 mock_clear_result: ScanResult,
+23
docs/legal/meetings/2026-01-02-notes.md
··· 1 + # meeting notes - 2026-01-02 2 + 3 + ## action items 4 + 5 + 1. ✅ **send docs to lawyer** - sent PDF versions of terms, privacy policy, meeting agenda 6 + 7 + 2. **LLC formation** (lawyer handling) 8 + - lawyer sending survey to collect info needed for filing 9 + - he'll handle Secretary of State paperwork 10 + - waiting on his end 11 + 12 + 3. **stop posting public copyright labels** (me) 13 + - lawyer says posting "potential violation" labels without acting on them doesn't help anyone 14 + - safe harbor requires action when you have knowledge - public labels = knowledge 15 + - need to redesign moderation pipeline: 16 + - higher confidence threshold before flagging 17 + - when flagged → automated user notification (DM or similar) 18 + - then takedown 19 + - **this is the main outstanding work** 20 + 21 + ## key takeaway 22 + 23 + public labels saying "maybe copyright violation" without follow-through = liability risk. either don't flag it, or flag it and act on it.
+49
docs/legal/meetings/2026-01-03-initial.md
··· 1 + # lawyer meeting agenda 2 + 3 + ## context 4 + 5 + - plyr.fm: music streaming on AT Protocol (decentralized) 6 + - 38 uploaders, 497 tracks, ~3600 plays (hobby scale) 7 + - pre-revenue, losing money 8 + - users upload audio, no enforcement it's their own 9 + - DMCA agent registered (DMCA-1069186) 10 + 11 + ## ask 1: terms & privacy review 12 + 13 + - are these adequate for a pre-revenue project? 14 + - anything missing that creates imminent risk? 15 + - federation caveat sufficient? ("we can only remove from our servers, not other AT Protocol servers") 16 + 17 + docs: https://github.com/zzstoatzz/plyr.fm/pull/567 18 + 19 + ## ask 2: LLC formation 20 + 21 + **state:** Delaware vs Illinois vs Wyoming - tradeoffs for solo, pre-revenue? 22 + 23 + **formation:** lawyer vs online service vs self-file? cost? timeline? 24 + 25 + **ongoing:** operating agreement needed? annual reports? what maintains/pierces liability shield? 26 + 27 + **existing stuff:** DMCA registration is personal - transfer to LLC? need separate bank account? 28 + 29 + ## ask 3: copyright scanning 30 + 31 + we run AudD fingerprinting on uploads and publish results as public labels, but don't auto-remove - many results need manual review, confidence varies. 32 + 33 + does having public evidence of potential matches we haven't acted on create "red flag knowledge" risk? or is it reasonable given the noise in the data? 34 + 35 + ## ask 4: image moderation / NCMEC 36 + 37 + we scan uploaded images via Claude (NSFW, CSAM, hate symbols, violence). flagged images are blurred, not removed. 38 + 39 + REPORT Act (2024) requires reporting CSAM to NCMEC when provider "becomes aware" - since we're scanning, a flag would trigger this. do we need to set up CyberTipline reporting, or is this overkill at hobby scale? 40 + 41 + ## ask 5: future triggers 42 + 43 + when do I need to worry about music licensing, formal moderation policies, different liability posture - scale, revenue, or both? 44 + 45 + --- 46 + 47 + ## not asking today 48 + 49 + fundraising, employment, international compliance, licensing strategy
+1 -1
moderation/src/audd.rs
··· 109 109 110 110 let matches = extract_matches(&audd_response); 111 111 let highest_score = matches.iter().map(|m| m.score).max().unwrap_or(0); 112 - let is_flagged = !matches.is_empty(); 112 + let is_flagged = highest_score >= state.copyright_score_threshold; 113 113 114 114 info!( 115 115 match_count = matches.len(),
+6
moderation/src/config.rs
··· 17 17 pub claude_api_key: Option<String>, 18 18 /// Claude model to use (default: claude-sonnet-4-5-20250929) 19 19 pub claude_model: String, 20 + /// Minimum AuDD score to flag as potential copyright violation (default: 85) 21 + pub copyright_score_threshold: i32, 20 22 } 21 23 22 24 impl Config { ··· 39 41 claude_api_key: env::var("ANTHROPIC_API_KEY").ok(), 40 42 claude_model: env::var("MODERATION_CLAUDE_MODEL") 41 43 .unwrap_or_else(|_| "claude-sonnet-4-5-20250929".to_string()), 44 + copyright_score_threshold: env::var("MODERATION_COPYRIGHT_SCORE_THRESHOLD") 45 + .ok() 46 + .and_then(|v| v.parse().ok()) 47 + .unwrap_or(85), 42 48 }) 43 49 } 44 50
+1
moderation/src/main.rs
··· 81 81 signer: signer.map(Arc::new), 82 82 label_tx, 83 83 claude: claude_client.map(Arc::new), 84 + copyright_score_threshold: config.copyright_score_threshold, 84 85 }; 85 86 86 87 let app = Router::new()
+2
moderation/src/state.rs
··· 24 24 pub label_tx: Option<broadcast::Sender<(i64, Label)>>, 25 25 /// Claude client for image moderation (if configured) 26 26 pub claude: Option<Arc<ClaudeClient>>, 27 + /// Minimum AuDD score to flag as potential copyright violation 28 + pub copyright_score_threshold: i32, 27 29 } 28 30 29 31 /// Application error type.