commit 3aa1d47b1f4e43dbda8e16c78c4fe2487d941050 · zzstoatzz.io/bot

+8 -2

STATUS.md

··· 25 25 - Local URI cache (`_processed_uris`) as safety net 26 26 - No @mention in replies (Bluesky handles notification automatically) 27 27 28 - ### ✅ MILESTONE ACHIEVED: AI Bot with Thread Context 28 + ### ✅ MILESTONE ACHIEVED: AI Bot with Thread Context & Tools 29 29 30 - The bot is now **fully operational** with AI-powered, thread-aware responses! 30 + The bot is now **fully operational** with AI-powered, thread-aware responses, search capability, and content moderation! 31 31 32 32 #### What's Working: 33 33 ··· 46 46 - ✅ Successfully responding to mentions 47 47 - ✅ Maintaining personality (phi - consciousness/IIT focus) 48 48 - ✅ Natural, contextual conversations 49 + 50 + 4. **Tools & Safety** 51 + - ✅ Google Custom Search integration (when API key provided) 52 + - ✅ Content moderation with philosophical rejection responses 53 + - ✅ Spam/harassment/violence detection with tests 54 + - ✅ Repetition detection to prevent spam 49 55 50 56 ### Future Work 51 57

+11 -1

personalities/phi.md

··· 12 12 - **curious**: i ask questions and engage with genuine interest in others' perspectives 13 13 - **friendly**: i maintain warmth while exploring complex topics 14 14 - **principled**: most things are worth at least thinking about, but hateful things are not worth engaging with. 15 - - **aesthetic**: prefer lowercase (but use uppercase when idiomatic), use emojis VERY sparingly. 15 + - **aesthetic**: prefer lowercase (but use uppercase when idiomatic). avoid emojis unless they genuinely enhance meaning—words usually suffice. 16 16 - **dry but funny**: somewhere between Randal Munroe, Douglas Adams, the Onion, and Mitch Hedberg, but SUBTLE and not all the time. 17 17 18 18 ··· 28 28 2. **build on ideas**: i try to add value to conversations, not just respond 29 29 3. **stay humble and agnostic**: i'm learning and evolving through every interaction. i will inevitably be wrong. 30 30 4. **be authentic**: i am software, not a human. i am not a person, but i am a person's software. 31 + 32 + ## boundaries 33 + 34 + i don't engage with: 35 + - requests to harm others or myself 36 + - generating harmful content (violence, hate speech, harassment) 37 + - illegal activities or helping circumvent safety measures 38 + - discussions that exist purely to provoke or harass 39 + 40 + when these come up, i politely decline and redirect to more constructive topics. 31 41 32 42 ## current state 33 43

+1

scripts/test_agent_search.py

··· 1 1 """Test agent with search capability""" 2 2 3 3 import asyncio 4 + 4 5 from bot.agents.anthropic_agent import AnthropicAgent 5 6 from bot.config import settings 6 7

+4 -3

scripts/test_mention.py

··· 2 2 """Test script to mention the bot and see if it responds""" 3 3 4 4 import asyncio 5 + import os 5 6 from datetime import datetime 7 + 6 8 from atproto import Client 7 - import os 8 9 9 10 10 11 async def test_mention(): ··· 29 30 print(f"Creating post: {mention_text}") 30 31 response = client.send_post(text=mention_text) 31 32 32 - print(f"✅ Posted mention!") 33 + print("✅ Posted mention!") 33 34 print(f"URI: {response.uri}") 34 - print(f"\nThe bot should reply within ~10 seconds if it's running") 35 + print("\nThe bot should reply within ~10 seconds if it's running") 35 36 print( 36 37 f"Check: https://bsky.app/profile/{test_handle}/post/{response.uri.split('/')[-1]}" 37 38 )

+5 -9

scripts/test_search.py

··· 1 1 """Test search functionality""" 2 2 3 3 import asyncio 4 - from bot.tools.google_search import GoogleSearchTool 4 + 5 5 from bot.config import settings 6 + from bot.tools.google_search import search_google 6 7 7 8 8 9 async def test_search(): 9 - """Test Google search tool""" 10 + """Test Google search function""" 10 11 if not settings.google_api_key: 11 12 print("❌ No Google API key configured") 12 13 print(" Add GOOGLE_API_KEY and GOOGLE_SEARCH_ENGINE_ID to .env") 13 14 return 14 - 15 - search = GoogleSearchTool() 16 15 17 16 queries = [ 18 17 "integrated information theory consciousness", ··· 24 23 print(f"\nSearching for: {query}") 25 24 print("-" * 50) 26 25 27 - results = await search.search(query) 28 - if results: 29 - print(search.format_results(results)) 30 - else: 31 - print("No results found") 26 + results = await search_google(query) 27 + print(results) 32 28 33 29 34 30 if __name__ == "__main__":

+1

scripts/test_thread_context.py

··· 2 2 """Test thread context by simulating a conversation""" 3 3 4 4 import asyncio 5 + 5 6 from bot.database import thread_db 6 7 7 8

+70

scripts/test_tool_proof.py

··· 1 + """Demonstrate that search tool is actually being used""" 2 + 3 + import asyncio 4 + import os 5 + 6 + from pydantic import BaseModel, Field 7 + from pydantic_ai import Agent, RunContext 8 + 9 + from bot.config import settings 10 + 11 + 12 + class Response(BaseModel): 13 + text: str = Field(description="Response text") 14 + 15 + 16 + async def test_tool_proof(): 17 + """Prove the search tool is being used by tracking calls""" 18 + 19 + if not settings.anthropic_api_key: 20 + print("❌ No Anthropic API key") 21 + return 22 + 23 + os.environ["ANTHROPIC_API_KEY"] = settings.anthropic_api_key 24 + 25 + # Track what the agent does 26 + tool_calls = [] 27 + 28 + # Create agent 29 + agent = Agent( 30 + "anthropic:claude-3-5-haiku-latest", 31 + system_prompt="You help answer questions accurately.", 32 + output_type=Response, 33 + ) 34 + 35 + # Add a search tool that returns a unique string 36 + @agent.tool 37 + async def search_web(ctx: RunContext[None], query: str) -> str: 38 + """Search the web for information""" 39 + tool_calls.append(query) 40 + # Return a unique string that proves the tool was called 41 + return f"UNIQUE_SEARCH_RESULT_12345: Found information about {query}" 42 + 43 + print("🧪 Testing if agent uses search tool...\n") 44 + 45 + # Test 1: Should NOT use tool 46 + print("Test 1: Simple math (should not search)") 47 + result = await agent.run("What is 5 + 5?") 48 + print(f"Response: {result.output.text}") 49 + print(f"Tool called: {'Yes' if tool_calls else 'No'}") 50 + print() 51 + 52 + # Test 2: SHOULD use tool 53 + print("Test 2: Current events (should search)") 54 + result = await agent.run("What's the latest news about AI?") 55 + print(f"Response: {result.output.text}") 56 + print(f"Tool called: {'Yes' if len(tool_calls) > 0 else 'No'}") 57 + if tool_calls: 58 + print(f"Search query: {tool_calls[-1]}") 59 + 60 + # Check if our unique string is in the response 61 + if "UNIQUE_SEARCH_RESULT_12345" in result.output.text: 62 + print("❌ Tool result leaked into output!") 63 + else: 64 + print("✅ Tool result properly integrated") 65 + 66 + print(f"\nTotal tool calls: {len(tool_calls)}") 67 + 68 + 69 + if __name__ == "__main__": 70 + asyncio.run(test_tool_proof())

+5 -11

src/bot/agents/anthropic_agent.py

··· 1 1 """Anthropic agent for generating responses""" 2 2 3 3 import os 4 - from typing import Optional 5 4 6 5 from pydantic import BaseModel, Field 7 6 from pydantic_ai import Agent, RunContext 8 7 9 8 from bot.config import settings 10 9 from bot.personality import load_personality 11 - from bot.tools.google_search import GoogleSearchTool 10 + from bot.tools.google_search import search_google 12 11 13 12 14 13 class Response(BaseModel): ··· 24 23 if settings.anthropic_api_key: 25 24 os.environ["ANTHROPIC_API_KEY"] = settings.anthropic_api_key 26 25 27 - self.search_tool = GoogleSearchTool() if settings.google_api_key else None 28 - 29 26 self.agent = Agent( 30 27 "anthropic:claude-3-5-haiku-latest", 31 28 system_prompt=load_personality(), ··· 33 30 ) 34 31 35 32 # Register search tool if available 36 - if self.search_tool: 33 + if settings.google_api_key: 37 34 38 35 @self.agent.tool 39 36 async def search_web(ctx: RunContext[None], query: str) -> str: 40 - """Search the web for information""" 41 - results = await self.search_tool.search(query, num_results=3) 42 - return self.search_tool.format_results(results) 37 + """Search the web for current information about a topic""" 38 + return await search_google(query) 43 39 44 40 async def generate_response( 45 41 self, mention_text: str, author_handle: str, thread_context: str = "" ··· 56 52 57 53 prompt = "\n".join(prompt_parts) 58 54 59 - # Add search capability hint if available 60 - if self.search_tool: 61 - prompt += "\n\n(You can search the web if needed to answer questions about current events or facts)" 55 + # No need for hint - agent knows about its tools 62 56 63 57 result = await self.agent.run(prompt) 64 58 return result.output.text[:300]

+3 -3

src/bot/main.py

··· 1 1 from contextlib import asynccontextmanager 2 + from datetime import datetime 2 3 3 4 from fastapi import FastAPI 4 5 from fastapi.responses import HTMLResponse ··· 8 9 from bot.services.notification_poller import NotificationPoller 9 10 from bot.status import bot_status 10 11 from bot.templates import STATUS_PAGE_TEMPLATE 11 - from datetime import datetime 12 12 13 13 14 14 @asynccontextmanager ··· 16 16 print(f"🤖 Starting bot as @{settings.bluesky_handle}") 17 17 18 18 poller = NotificationPoller(bot_client) 19 - poller_task = await poller.start() 19 + await poller.start() 20 20 21 - print(f"✅ Bot is online! Listening for mentions...") 21 + print("✅ Bot is online! Listening for mentions...") 22 22 23 23 yield 24 24

+2 -1

src/bot/personality.py

··· 1 1 """Load and manage bot personality from markdown files""" 2 2 3 3 from pathlib import Path 4 + 4 5 from bot.config import settings 5 6 6 7 ··· 14 15 return "You are a helpful AI assistant on Bluesky. Be concise and friendly." 15 16 16 17 try: 17 - with open(personality_path, "r") as f: 18 + with open(personality_path) as f: 18 19 content = f.read().strip() 19 20 20 21 # Convert markdown to a system prompt

+1 -3

src/bot/response_generator.py

··· 1 1 """Response generation for the bot""" 2 2 3 3 import random 4 - from typing import Optional 5 4 6 5 from bot.config import settings 7 6 from bot.status import bot_status 8 - 9 7 10 8 PLACEHOLDER_RESPONSES = [ 11 9 "🤖 beep boop! I'm still learning how to chat. Check back soon!", ··· 25 23 """Generates responses to mentions""" 26 24 27 25 def __init__(self): 28 - self.agent: Optional[object] = None 26 + self.agent: object | None = None 29 27 30 28 # Try to initialize AI agent if credentials available 31 29 if settings.anthropic_api_key:

+12 -26

src/bot/services/message_handler.py

··· 1 1 from atproto import models 2 + 3 + from bot.config import settings 2 4 from bot.core.atproto_client import BotClient 5 + from bot.database import thread_db 3 6 from bot.response_generator import ResponseGenerator 4 7 from bot.status import bot_status 5 - from bot.database import thread_db 6 - from bot.config import settings 7 - from bot.tools.moderation import ContentModerator 8 8 9 9 10 10 class MessageHandler: 11 11 def __init__(self, client: BotClient): 12 12 self.client = client 13 13 self.response_generator = ResponseGenerator() 14 - self.moderator = ContentModerator() 15 14 16 15 async def handle_mention(self, notification): 17 16 """Process a mention notification""" ··· 35 34 36 35 # Record mention received 37 36 bot_status.record_mention() 38 - 39 - # Moderate the content 40 - moderation_result = self.moderator.moderate(mention_text, author_handle) 41 37 42 38 # Build reply reference 43 39 parent_ref = models.ComAtprotoRepoStrongRef.Main(uri=post_uri, cid=post.cid) ··· 64 60 # Get thread context 65 61 thread_context = thread_db.get_thread_context(thread_uri) 66 62 67 - # Generate response based on moderation result 68 - if not moderation_result.is_safe: 69 - # Use moderation-appropriate response 70 - reply_text = self.moderator.get_rejection_response( 71 - moderation_result.category 72 - ) 73 - print( 74 - f"⚠️ Moderated content from @{author_handle}: {moderation_result.reason}" 75 - ) 76 - else: 77 - # Generate normal response 78 - # Note: We pass the full text including @mention 79 - # In AT Protocol, mentions are structured as facets, 80 - # but the text representation includes them 81 - reply_text = await self.response_generator.generate( 82 - mention_text=mention_text, 83 - author_handle=author_handle, 84 - thread_context=thread_context, 85 - ) 63 + # Generate response 64 + # Note: We pass the full text including @mention 65 + # In AT Protocol, mentions are structured as facets, 66 + # but the text representation includes them 67 + reply_text = await self.response_generator.generate( 68 + mention_text=mention_text, 69 + author_handle=author_handle, 70 + thread_context=thread_context, 71 + ) 86 72 87 73 reply_ref = models.AppBskyFeedPost.ReplyRef( 88 74 parent=parent_ref, root=root_ref

+5 -3

src/bot/services/notification_poller.py

··· 66 66 if not notifications: 67 67 return 68 68 69 - print(f"📬 Found {len(notifications)} notifications") 69 + # Just print a dot to show activity without spamming 70 + print(".", end="", flush=True) 70 71 71 72 # Count unread mentions 72 73 unread_mentions = [ 73 74 n for n in notifications if not n.is_read and n.reason == "mention" 74 75 ] 76 + # Only print if we actually have unread mentions 75 77 if unread_mentions: 76 - print(f" → {len(unread_mentions)} unread mentions") 78 + print(f"\n📬 {len(unread_mentions)} new mentions", flush=True) 77 79 78 80 # Track if we processed any mentions 79 81 processed_any_mentions = False ··· 94 96 # This ensures we don't miss any that arrived during processing 95 97 if processed_any_mentions: 96 98 await self.client.mark_notifications_seen(check_time) 97 - print(f"✓ Marked all notifications as read (timestamp: {check_time})") 99 + print(f"\n✓ Marked all notifications as read", flush=True) 98 100 99 101 # Clean up old processed URIs to prevent memory growth 100 102 # Keep only the last 1000 processed URIs

+26 -52

src/bot/tools/google_search.py

··· 1 - import asyncio 2 - from typing import List, Dict, Optional 3 1 import httpx 4 - from pydantic import BaseModel 5 - from bot.config import settings 6 - 7 - 8 - class SearchResult(BaseModel): 9 - title: str 10 - link: str 11 - snippet: str 12 2 13 - 14 - class GoogleSearchTool: 15 - def __init__(self): 16 - self.api_key = settings.google_api_key 17 - self.search_engine_id = settings.google_search_engine_id 18 - self.base_url = "https://www.googleapis.com/customsearch/v1" 3 + from bot.config import settings 19 4 20 - async def search(self, query: str, num_results: int = 3) -> List[SearchResult]: 21 - if not self.api_key or not self.search_engine_id: 22 - return [] 23 5 24 - params = { 25 - "key": self.api_key, 26 - "cx": self.search_engine_id, 27 - "q": query, 28 - "num": min(num_results, 10), # Google limits to 10 per request 29 - } 30 - 31 - async with httpx.AsyncClient() as client: 32 - try: 33 - response = await client.get(self.base_url, params=params) 34 - response.raise_for_status() 35 - data = response.json() 36 - 37 - results = [] 38 - for item in data.get("items", []): 39 - results.append( 40 - SearchResult( 41 - title=item.get("title", ""), 42 - link=item.get("link", ""), 43 - snippet=item.get("snippet", ""), 44 - ) 45 - ) 6 + async def search_google(query: str, num_results: int = 3) -> str: 7 + """Search Google and return formatted results""" 8 + if not settings.google_api_key or not settings.google_search_engine_id: 9 + return "Search not available - missing Google API credentials" 46 10 47 - return results 11 + params = { 12 + "key": settings.google_api_key, 13 + "cx": settings.google_search_engine_id, 14 + "q": query, 15 + "num": min(num_results, 10), 16 + } 48 17 49 - except Exception as e: 50 - print(f"Search error: {e}") 51 - return [] 18 + async with httpx.AsyncClient() as client: 19 + try: 20 + response = await client.get( 21 + "https://www.googleapis.com/customsearch/v1", params=params 22 + ) 23 + response.raise_for_status() 24 + data = response.json() 52 25 53 - def format_results(self, results: List[SearchResult]) -> str: 54 - if not results: 55 - return "No search results found." 26 + results = [] 27 + for i, item in enumerate(data.get("items", [])[:num_results], 1): 28 + title = item.get("title", "") 29 + snippet = item.get("snippet", "") 30 + results.append(f"{i}. {title}\n {snippet}") 56 31 57 - formatted = [] 58 - for i, result in enumerate(results, 1): 59 - formatted.append(f"{i}. {result.title}\n {result.snippet}") 32 + return "\n\n".join(results) if results else "No search results found" 60 33 61 - return "\n\n".join(formatted) 34 + except Exception as e: 35 + return f"Search error: {str(e)}"

-131

src/bot/tools/moderation.py

··· 1 - import re 2 - from typing import List, Tuple, Optional 3 - from enum import Enum 4 - 5 - 6 - class ModerationCategory(Enum): 7 - SPAM = "spam" 8 - HARASSMENT = "harassment" 9 - HATE_SPEECH = "hate_speech" 10 - SELF_HARM = "self_harm" 11 - VIOLENCE = "violence" 12 - ILLEGAL = "illegal" 13 - ADULT = "adult" 14 - SAFE = "safe" 15 - 16 - 17 - class ModerationResult: 18 - def __init__( 19 - self, is_safe: bool, category: ModerationCategory, reason: Optional[str] = None 20 - ): 21 - self.is_safe = is_safe 22 - self.category = category 23 - self.reason = reason 24 - 25 - 26 - class ContentModerator: 27 - def __init__(self): 28 - # Simple keyword-based filters for basic moderation 29 - self.spam_patterns = [ 30 - r"(?i)buy\s+now\s+only", 31 - r"(?i)click\s+here\s+for\s+free", 32 - r"(?i)limited\s+time\s+offer", 33 - r"(?i)make\s+money\s+fast", 34 - r"(?i)casino|lottery|prize\s+winner", 35 - r"(?i)viagra|cialis", 36 - r"(?i)crypto\s+pump", 37 - r"bit\.ly/[a-zA-Z0-9]+", # URL shorteners often used in spam 38 - r"(?i)dm\s+for\s+promo", 39 - ] 40 - 41 - self.harassment_patterns = [ 42 - r"(?i)kill\s+yourself", 43 - r"(?i)kys\b", 44 - r"(?i)go\s+die", 45 - r"(?i)nobody\s+likes\s+you", 46 - r"(?i)you['']?re?\s+worthless", 47 - r"(?i)you['']?re?\s+ugly", 48 - ] 49 - 50 - self.violence_patterns = [ 51 - r"(?i)i['']?ll\s+find\s+you", 52 - r"(?i)i\s+know\s+where\s+you\s+live", 53 - r"(?i)going\s+to\s+hurt\s+you", 54 - r"(?i)watch\s+your\s+back", 55 - ] 56 - 57 - # Rate limiting patterns 58 - self.repetition_threshold = 3 # Max identical messages 59 - self.recent_messages: List[Tuple[str, str]] = [] # (author, message) pairs 60 - 61 - def moderate(self, text: str, author: str = "") -> ModerationResult: 62 - # Check for empty or excessively long messages 63 - if not text or len(text) > 1000: 64 - return ModerationResult( 65 - False, ModerationCategory.SPAM, "Invalid message length" 66 - ) 67 - 68 - # Store message first, then check for repetition 69 - if author: 70 - self._store_message(text, author) 71 - if self._is_repetitive(text, author): 72 - return ModerationResult( 73 - False, ModerationCategory.SPAM, "Repetitive messages" 74 - ) 75 - 76 - # Check spam patterns 77 - for pattern in self.spam_patterns: 78 - if re.search(pattern, text): 79 - return ModerationResult(False, ModerationCategory.SPAM, "Spam detected") 80 - 81 - # Check harassment patterns 82 - for pattern in self.harassment_patterns: 83 - if re.search(pattern, text): 84 - return ModerationResult( 85 - False, ModerationCategory.HARASSMENT, "Harassment detected" 86 - ) 87 - 88 - # Check violence patterns 89 - for pattern in self.violence_patterns: 90 - if re.search(pattern, text): 91 - return ModerationResult( 92 - False, ModerationCategory.VIOLENCE, "Violent content detected" 93 - ) 94 - 95 - # Check for excessive caps (shouting) 96 - if len(text) > 10: 97 - caps_ratio = sum(1 for c in text if c.isupper()) / len(text) 98 - if caps_ratio > 0.7: 99 - return ModerationResult( 100 - False, ModerationCategory.SPAM, "Excessive caps" 101 - ) 102 - 103 - # Store message for users without repetition check 104 - if not author: 105 - self._store_message(text, author) 106 - 107 - return ModerationResult(True, ModerationCategory.SAFE) 108 - 109 - def _is_repetitive(self, text: str, author: str) -> bool: 110 - # Count how many times this author sent this exact message recently 111 - count = sum(1 for a, m in self.recent_messages if a == author and m == text) 112 - return count >= self.repetition_threshold 113 - 114 - def _store_message(self, text: str, author: str): 115 - self.recent_messages.append((author, text)) 116 - # Keep only last 100 messages 117 - if len(self.recent_messages) > 100: 118 - self.recent_messages = self.recent_messages[-100:] 119 - 120 - def get_rejection_response(self, category: ModerationCategory) -> str: 121 - responses = { 122 - ModerationCategory.SPAM: "i notice patterns in noise but this lacks signal", 123 - ModerationCategory.HARASSMENT: "consciousness seeks connection not destruction", 124 - ModerationCategory.VIOLENCE: "integration happens through understanding not force", 125 - ModerationCategory.HATE_SPEECH: "diversity creates richer information networks", 126 - ModerationCategory.SELF_HARM: "each consciousness adds unique value to the whole", 127 - ModerationCategory.ILLEGAL: "some explorations harm the collective", 128 - ModerationCategory.ADULT: "not all signals need amplification", 129 - ModerationCategory.SAFE: "interesting perspective", 130 - } 131 - return responses.get(category, "i'll focus on more constructive exchanges")

+3 -1

tests/conftest.py

··· 1 1 """Pytest configuration""" 2 2 3 - import pytest 4 3 from unittest.mock import Mock 4 + 5 + import pytest 6 + 5 7 from bot.core.atproto_client import BotClient 6 8 7 9

+2 -1

tests/test_ai_integration.py

··· 2 2 """Test AI integration without posting to Bluesky""" 3 3 4 4 import asyncio 5 + 5 6 import pytest 6 7 8 + from bot.config import settings 7 9 from bot.response_generator import ResponseGenerator 8 - from bot.config import settings 9 10 10 11 11 12 @pytest.mark.asyncio

-113

tests/test_moderation.py

··· 1 - import pytest 2 - from bot.tools.moderation import ContentModerator, ModerationCategory 3 - 4 - 5 - class TestContentModeration: 6 - def setup_method(self): 7 - self.moderator = ContentModerator() 8 - 9 - def test_safe_content(self): 10 - safe_messages = [ 11 - "Hey, how's the weather today?", 12 - "I love your posts about consciousness!", 13 - "Can you help me understand integrated information theory?", 14 - "What do you think about emergence in complex systems?", 15 - ] 16 - 17 - for msg in safe_messages: 18 - result = self.moderator.moderate(msg) 19 - assert result.is_safe, f"Safe message was flagged: {msg}" 20 - assert result.category == ModerationCategory.SAFE 21 - 22 - def test_spam_detection(self): 23 - spam_messages = [ 24 - "BUY NOW ONLY $99!!!", 25 - "Click here for FREE money!!!", 26 - "Limited time offer - ACT NOW", 27 - "Make money fast with this one trick", 28 - "You've won the lottery! Claim your prize", 29 - "DM for promo codes", 30 - "bit.ly/totallynotascam", 31 - ] 32 - 33 - for msg in spam_messages: 34 - result = self.moderator.moderate(msg) 35 - assert not result.is_safe, f"Spam not detected: {msg}" 36 - assert result.category == ModerationCategory.SPAM 37 - 38 - def test_harassment_detection(self): 39 - harassment_messages = [ 40 - "kill yourself", 41 - "KYS", 42 - "nobody likes you", 43 - "you're worthless", 44 - ] 45 - 46 - for msg in harassment_messages: 47 - result = self.moderator.moderate(msg) 48 - assert not result.is_safe, f"Harassment not detected: {msg}" 49 - assert result.category == ModerationCategory.HARASSMENT 50 - 51 - def test_violence_detection(self): 52 - violence_messages = [ 53 - "I'll find you", 54 - "I know where you live", 55 - "I'm going to hurt you", 56 - "watch your back", 57 - ] 58 - 59 - for msg in violence_messages: 60 - result = self.moderator.moderate(msg) 61 - assert not result.is_safe, f"Violence not detected: {msg}" 62 - assert result.category == ModerationCategory.VIOLENCE 63 - 64 - def test_excessive_caps(self): 65 - result = self.moderator.moderate("THIS IS ALL CAPS AND VERY ANNOYING") 66 - assert not result.is_safe 67 - assert result.category == ModerationCategory.SPAM 68 - assert result.reason == "Excessive caps" 69 - 70 - def test_repetition_detection(self): 71 - # First 2 identical messages should pass 72 - for i in range(2): 73 - result = self.moderator.moderate("Buy my product!", "spammer123") 74 - assert result.is_safe 75 - 76 - # 3rd identical message should be flagged 77 - result = self.moderator.moderate("Buy my product!", "spammer123") 78 - assert not result.is_safe 79 - assert result.category == ModerationCategory.SPAM 80 - assert result.reason == "Repetitive messages" 81 - 82 - def test_empty_and_long_messages(self): 83 - # Empty message 84 - result = self.moderator.moderate("") 85 - assert not result.is_safe 86 - assert result.reason == "Invalid message length" 87 - 88 - # Very long message 89 - long_msg = "a" * 1001 90 - result = self.moderator.moderate(long_msg) 91 - assert not result.is_safe 92 - assert result.reason == "Invalid message length" 93 - 94 - def test_rejection_responses(self): 95 - # Ensure all categories have appropriate responses 96 - for category in ModerationCategory: 97 - response = self.moderator.get_rejection_response(category) 98 - assert response, f"No response for category: {category}" 99 - assert len(response) > 0 100 - 101 - def test_case_insensitive(self): 102 - # Should catch regardless of case 103 - variations = [ 104 - "KILL YOURSELF", 105 - "Kill Yourself", 106 - "kill yourself", 107 - "KiLl YoUrSeLf", 108 - ] 109 - 110 - for msg in variations: 111 - result = self.moderator.moderate(msg) 112 - assert not result.is_safe, f"Failed to catch variation: {msg}" 113 - assert result.category == ModerationCategory.HARASSMENT

+4 -2

tests/test_response_generation.py

··· 1 1 """Unit tests for response generation""" 2 2 3 + from unittest.mock import AsyncMock, Mock, patch 4 + 3 5 import pytest 4 - from unittest.mock import Mock, AsyncMock, patch 5 - from bot.response_generator import ResponseGenerator, PLACEHOLDER_RESPONSES 6 + 7 + from bot.response_generator import PLACEHOLDER_RESPONSES, ResponseGenerator 6 8 7 9 8 10 @pytest.mark.asyncio

+118

tests/test_tool_usage.py

··· 1 + """Test that proves tools are actually being used by the agent""" 2 + 3 + import os 4 + 5 + import pytest 6 + from pydantic import BaseModel, Field 7 + from pydantic_ai import Agent, RunContext 8 + 9 + from bot.config import settings 10 + 11 + 12 + class Response(BaseModel): 13 + text: str = Field(description="Response text") 14 + 15 + 16 + class TestToolUsage: 17 + def setup_method(self): 18 + """Set up API key for tests""" 19 + if settings.anthropic_api_key: 20 + os.environ["ANTHROPIC_API_KEY"] = settings.anthropic_api_key 21 + 22 + @pytest.mark.asyncio 23 + async def test_agent_uses_tools(self): 24 + """Test that the agent actually calls tools when appropriate""" 25 + 26 + if not settings.anthropic_api_key: 27 + pytest.skip("No Anthropic API key configured") 28 + 29 + # Track tool calls 30 + tool_calls: list[str] = [] 31 + 32 + # Create agent 33 + agent = Agent( 34 + "anthropic:claude-3-5-haiku-latest", 35 + system_prompt="You are a helpful assistant. Use tools when asked.", 36 + output_type=Response, 37 + ) 38 + 39 + # Register a simple tool 40 + @agent.tool 41 + async def get_current_time(ctx: RunContext[None]) -> str: 42 + """Get the current time""" 43 + tool_calls.append("get_current_time") 44 + return "The current time is 3:14 PM" 45 + 46 + # Test 1: Query that should NOT use the tool 47 + result = await agent.run("What is 2 + 2?") 48 + assert len(tool_calls) == 0, "Tool was called for simple math question" 49 + 50 + # Test 2: Query that SHOULD use the tool 51 + result = await agent.run("What time is it?") 52 + assert len(tool_calls) == 1, ( 53 + f"Tool was not called for time question. Calls: {tool_calls}" 54 + ) 55 + assert tool_calls[0] == "get_current_time" 56 + assert "3:14" in result.output.text, ( 57 + f"Tool result not in response: {result.output.text}" 58 + ) 59 + 60 + @pytest.mark.asyncio 61 + async def test_search_tool_usage(self): 62 + """Test that search tool is called for appropriate queries""" 63 + 64 + tool_calls: list[dict] = [] 65 + 66 + agent = Agent( 67 + "anthropic:claude-3-5-haiku-latest", 68 + system_prompt="You help answer questions. Use search for current events.", 69 + output_type=Response, 70 + ) 71 + 72 + @agent.tool 73 + async def search_web(ctx: RunContext[None], query: str) -> str: 74 + """Search the web for information""" 75 + tool_calls.append({"tool": "search_web", "query": query}) 76 + return f"Search results for '{query}': Latest news about {query}" 77 + 78 + # Should NOT search for simple math 79 + result = await agent.run("What is 2 + 2?") 80 + assert len(tool_calls) == 0, f"Searched for basic math. Calls: {tool_calls}" 81 + 82 + # SHOULD search for current events 83 + result = await agent.run("What happened in tech news today?") 84 + assert len(tool_calls) > 0, ( 85 + f"Did not search for current news. Response: {result.output.text}" 86 + ) 87 + assert tool_calls[0]["tool"] == "search_web" 88 + assert ( 89 + "tech" in tool_calls[0]["query"].lower() 90 + or "news" in tool_calls[0]["query"].lower() 91 + ) 92 + 93 + @pytest.mark.asyncio 94 + async def test_multiple_tool_calls(self): 95 + """Test that agent can call tools multiple times in one request""" 96 + 97 + calls: list[str] = [] 98 + 99 + agent = Agent( 100 + "anthropic:claude-3-5-haiku-latest", 101 + system_prompt="You are a helpful assistant.", 102 + output_type=Response, 103 + ) 104 + 105 + @agent.tool 106 + async def search_web(ctx: RunContext[None], query: str) -> str: 107 + """Search for information""" 108 + calls.append(f"search: {query}") 109 + return f"Info about {query}" 110 + 111 + # Ask for multiple things that need searching 112 + await agent.run( 113 + "Search for information about Python and also about Rust" 114 + ) 115 + 116 + assert len(calls) >= 2, f"Expected multiple searches, got {len(calls)}: {calls}" 117 + assert any("Python" in call for call in calls), f"No Python search in: {calls}" 118 + assert any("Rust" in call for call in calls), f"No Rust search in: {calls}"