a digital entity named phi that roams bsky
at init 116 lines 4.1 kB view raw
1"""Test that proves tools are actually being used by the agent""" 2 3import os 4 5import pytest 6from pydantic import BaseModel, Field 7from pydantic_ai import Agent, RunContext 8 9from bot.config import settings 10 11 12class Response(BaseModel): 13 text: str = Field(description="Response text") 14 15 16class TestToolUsage: 17 def setup_method(self): 18 """Set up API key for tests""" 19 if settings.anthropic_api_key: 20 os.environ["ANTHROPIC_API_KEY"] = settings.anthropic_api_key 21 22 @pytest.mark.asyncio 23 async def test_agent_uses_tools(self): 24 """Test that the agent actually calls tools when appropriate""" 25 26 if not settings.anthropic_api_key: 27 pytest.skip("No Anthropic API key configured") 28 29 # Track tool calls 30 tool_calls: list[str] = [] 31 32 # Create agent 33 agent = Agent( 34 "anthropic:claude-3-5-haiku-latest", 35 system_prompt="You are a helpful assistant. Use tools when asked.", 36 output_type=Response, 37 ) 38 39 # Register a simple tool 40 @agent.tool 41 async def get_current_time(ctx: RunContext[None]) -> str: 42 """Get the current time""" 43 tool_calls.append("get_current_time") 44 return "The current time is 3:14 PM" 45 46 # Test 1: Query that should NOT use the tool 47 result = await agent.run("What is 2 + 2?") 48 assert len(tool_calls) == 0, "Tool was called for simple math question" 49 50 # Test 2: Query that SHOULD use the tool 51 result = await agent.run("What time is it?") 52 assert len(tool_calls) == 1, ( 53 f"Tool was not called for time question. Calls: {tool_calls}" 54 ) 55 assert tool_calls[0] == "get_current_time" 56 assert "3:14" in result.output.text, ( 57 f"Tool result not in response: {result.output.text}" 58 ) 59 60 @pytest.mark.asyncio 61 async def test_search_tool_usage(self): 62 """Test that search tool is called for appropriate queries""" 63 64 tool_calls: list[dict] = [] 65 66 agent = Agent( 67 "anthropic:claude-3-5-haiku-latest", 68 system_prompt="You help answer questions. Use search for current events.", 69 output_type=Response, 70 ) 71 72 @agent.tool 73 async def search_web(ctx: RunContext[None], query: str) -> str: 74 """Search the web for information""" 75 tool_calls.append({"tool": "search_web", "query": query}) 76 return f"Search results for '{query}': Latest news about {query}" 77 78 # Should NOT search for simple math 79 result = await agent.run("What is 2 + 2?") 80 assert len(tool_calls) == 0, f"Searched for basic math. Calls: {tool_calls}" 81 82 # SHOULD search for current events 83 result = await agent.run("What happened in tech news today?") 84 assert len(tool_calls) > 0, ( 85 f"Did not search for current news. Response: {result.output.text}" 86 ) 87 assert tool_calls[0]["tool"] == "search_web" 88 assert ( 89 "tech" in tool_calls[0]["query"].lower() 90 or "news" in tool_calls[0]["query"].lower() 91 ) 92 93 @pytest.mark.asyncio 94 async def test_multiple_tool_calls(self): 95 """Test that agent can call tools multiple times in one request""" 96 97 calls: list[str] = [] 98 99 agent = Agent( 100 "anthropic:claude-3-5-haiku-latest", 101 system_prompt="You are a helpful assistant.", 102 output_type=Response, 103 ) 104 105 @agent.tool 106 async def search_web(ctx: RunContext[None], query: str) -> str: 107 """Search for information""" 108 calls.append(f"search: {query}") 109 return f"Info about {query}" 110 111 # Ask for multiple things that need searching 112 await agent.run("Search for information about Python and also about Rust") 113 114 assert len(calls) >= 2, f"Expected multiple searches, got {len(calls)}: {calls}" 115 assert any("Python" in call for call in calls), f"No Python search in: {calls}" 116 assert any("Rust" in call for call in calls), f"No Rust search in: {calls}"