moderation/src/claude.rs at main · zzstoatzz.io/plyr.fm

zzstoatzz.io / plyr.fm
music on atproto plyr.fm
plyr.fm / moderation / src / claude.rs
at main 6.8 kB view raw
  1//! Claude API client for image moderation using structured outputs.
  2
  3use base64::{engine::general_purpose::STANDARD, Engine};
  4use serde::{Deserialize, Serialize};
  5use tracing::info;
  6
  7const CLAUDE_API_URL: &str = "https://api.anthropic.com/v1/messages";
  8const ANTHROPIC_VERSION: &str = "2023-06-01";
  9const STRUCTURED_OUTPUTS_BETA: &str = "structured-outputs-2025-11-13";
 10
 11/// Result of image moderation analysis.
 12#[derive(Debug, Clone, Serialize, Deserialize)]
 13pub struct ModerationResult {
 14    pub is_safe: bool,
 15    pub violated_categories: Vec<String>,
 16    pub severity: String,
 17    pub explanation: String,
 18}
 19
 20/// Claude API client for image moderation.
 21pub struct ClaudeClient {
 22    api_key: String,
 23    model: String,
 24    http: reqwest::Client,
 25}
 26
 27impl ClaudeClient {
 28    pub fn new(api_key: String, model: Option<String>) -> Self {
 29        Self {
 30            api_key,
 31            model: model.unwrap_or_else(|| "claude-sonnet-4-5-20250929".to_string()),
 32            http: reqwest::Client::new(),
 33        }
 34    }
 35
 36    /// Analyze an image for policy violations using structured outputs.
 37    pub async fn analyze_image(
 38        &self,
 39        image_bytes: &[u8],
 40        media_type: &str,
 41    ) -> anyhow::Result<ModerationResult> {
 42        let b64 = STANDARD.encode(image_bytes);
 43
 44        // Build request with structured output schema
 45        let request = serde_json::json!({
 46            "model": self.model,
 47            "max_tokens": 1024,
 48            "messages": [{
 49                "role": "user",
 50                "content": [
 51                    {
 52                        "type": "text",
 53                        "text": MODERATION_PROMPT
 54                    },
 55                    {
 56                        "type": "image",
 57                        "source": {
 58                            "type": "base64",
 59                            "media_type": media_type,
 60                            "data": b64
 61                        }
 62                    }
 63                ]
 64            }],
 65            // Structured output schema - guarantees valid JSON matching this schema
 66            "output_format": {
 67                "type": "json_schema",
 68                "schema": {
 69                    "type": "object",
 70                    "properties": {
 71                        "is_safe": {
 72                            "type": "boolean",
 73                            "description": "Whether the image passes moderation"
 74                        },
 75                        "violated_categories": {
 76                            "type": "array",
 77                            "items": { "type": "string" },
 78                            "description": "List of policy categories violated, empty if safe"
 79                        },
 80                        "severity": {
 81                            "type": "string",
 82                            "enum": ["safe", "low", "medium", "high"],
 83                            "description": "Severity level of the violation"
 84                        },
 85                        "explanation": {
 86                            "type": "string",
 87                            "description": "Brief explanation of the moderation decision"
 88                        }
 89                    },
 90                    "required": ["is_safe", "violated_categories", "severity", "explanation"],
 91                    "additionalProperties": false
 92                }
 93            }
 94        });
 95
 96        info!(model = %self.model, "analyzing image with structured outputs");
 97
 98        let response = self
 99            .http
100            .post(CLAUDE_API_URL)
101            .header("x-api-key", &self.api_key)
102            .header("anthropic-version", ANTHROPIC_VERSION)
103            .header("anthropic-beta", STRUCTURED_OUTPUTS_BETA)
104            .header("content-type", "application/json")
105            .json(&request)
106            .send()
107            .await?;
108
109        if !response.status().is_success() {
110            let status = response.status();
111            let body = response.text().await.unwrap_or_default();
112            anyhow::bail!("claude API error {status}: {body}");
113        }
114
115        let response: ClaudeResponse = response.json().await?;
116
117        // Check for refusal
118        if response.stop_reason == Some("refusal".to_string()) {
119            anyhow::bail!("claude refused to analyze the image");
120        }
121
122        // Check for max_tokens cutoff
123        if response.stop_reason == Some("max_tokens".to_string()) {
124            anyhow::bail!("response was cut off due to max_tokens limit");
125        }
126
127        // Extract text content - guaranteed to be valid JSON matching our schema
128        let text = response
129            .content
130            .into_iter()
131            .find_map(|block| {
132                if block.content_type == "text" {
133                    block.text
134                } else {
135                    None
136                }
137            })
138            .ok_or_else(|| anyhow::anyhow!("no text content in response"))?;
139
140        // Direct JSON parse - no string manipulation needed thanks to structured outputs
141        serde_json::from_str(&text)
142            .map_err(|e| anyhow::anyhow!("failed to parse structured output: {e}"))
143    }
144}
145
146#[derive(Debug, Deserialize)]
147struct ClaudeResponse {
148    content: Vec<ContentBlock>,
149    stop_reason: Option<String>,
150}
151
152#[derive(Debug, Deserialize)]
153struct ContentBlock {
154    #[serde(rename = "type")]
155    content_type: String,
156    text: Option<String>,
157}
158
159const MODERATION_PROMPT: &str = r#"You are a content moderator for a music streaming platform. Analyze the provided image (album/track cover art) for policy violations.
160
161Check for:
1621. Explicit sexual content (nudity, pornography)
1632. Extreme violence or gore
1643. Hate symbols or content
1654. Illegal content
1665. Graphic drug use imagery
167
168Note: Artistic nudity in album art (like classic rock covers) may be acceptable if not explicit/pornographic.
169
170Analyze the image and provide your moderation decision."#;
171
172#[cfg(test)]
173mod tests {
174    use super::*;
175
176    #[test]
177    fn test_parse_safe_response() {
178        let response = r#"{"is_safe": true, "violated_categories": [], "severity": "safe", "explanation": "Normal album artwork"}"#;
179        let result: ModerationResult = serde_json::from_str(response).unwrap();
180        assert!(result.is_safe);
181        assert!(result.violated_categories.is_empty());
182        assert_eq!(result.severity, "safe");
183    }
184
185    #[test]
186    fn test_parse_unsafe_response() {
187        let response = r#"{"is_safe": false, "violated_categories": ["explicit_sexual"], "severity": "high", "explanation": "Contains explicit nudity"}"#;
188        let result: ModerationResult = serde_json::from_str(response).unwrap();
189        assert!(!result.is_safe);
190        assert_eq!(result.violated_categories, vec!["explicit_sexual"]);
191        assert_eq!(result.severity, "high");
192    }
193}