a digital entity named phi that roams bsky
1"""Test that proves tools are actually being used by the agent"""
2
3import os
4
5import pytest
6from pydantic import BaseModel, Field
7from pydantic_ai import Agent, RunContext
8
9from bot.config import settings
10
11
12class Response(BaseModel):
13 text: str = Field(description="Response text")
14
15
16class TestToolUsage:
17 def setup_method(self):
18 """Set up API key for tests"""
19 if settings.anthropic_api_key:
20 os.environ["ANTHROPIC_API_KEY"] = settings.anthropic_api_key
21
22 @pytest.mark.asyncio
23 async def test_agent_uses_tools(self):
24 """Test that the agent actually calls tools when appropriate"""
25
26 if not settings.anthropic_api_key:
27 pytest.skip("No Anthropic API key configured")
28
29 # Track tool calls
30 tool_calls: list[str] = []
31
32 # Create agent
33 agent = Agent(
34 "anthropic:claude-3-5-haiku-latest",
35 system_prompt="You are a helpful assistant. Use tools when asked.",
36 output_type=Response,
37 )
38
39 # Register a simple tool
40 @agent.tool
41 async def get_current_time(ctx: RunContext[None]) -> str:
42 """Get the current time"""
43 tool_calls.append("get_current_time")
44 return "The current time is 3:14 PM"
45
46 # Test 1: Query that should NOT use the tool
47 result = await agent.run("What is 2 + 2?")
48 assert len(tool_calls) == 0, "Tool was called for simple math question"
49
50 # Test 2: Query that SHOULD use the tool
51 result = await agent.run("What time is it?")
52 assert len(tool_calls) == 1, (
53 f"Tool was not called for time question. Calls: {tool_calls}"
54 )
55 assert tool_calls[0] == "get_current_time"
56 assert "3:14" in result.output.text, (
57 f"Tool result not in response: {result.output.text}"
58 )
59
60 @pytest.mark.asyncio
61 async def test_search_tool_usage(self):
62 """Test that search tool is called for appropriate queries"""
63
64 tool_calls: list[dict] = []
65
66 agent = Agent(
67 "anthropic:claude-3-5-haiku-latest",
68 system_prompt="You help answer questions. Use search for current events.",
69 output_type=Response,
70 )
71
72 @agent.tool
73 async def search_web(ctx: RunContext[None], query: str) -> str:
74 """Search the web for information"""
75 tool_calls.append({"tool": "search_web", "query": query})
76 return f"Search results for '{query}': Latest news about {query}"
77
78 # Should NOT search for simple math
79 result = await agent.run("What is 2 + 2?")
80 assert len(tool_calls) == 0, f"Searched for basic math. Calls: {tool_calls}"
81
82 # SHOULD search for current events
83 result = await agent.run("What happened in tech news today?")
84 assert len(tool_calls) > 0, (
85 f"Did not search for current news. Response: {result.output.text}"
86 )
87 assert tool_calls[0]["tool"] == "search_web"
88 assert (
89 "tech" in tool_calls[0]["query"].lower()
90 or "news" in tool_calls[0]["query"].lower()
91 )
92
93 @pytest.mark.asyncio
94 async def test_multiple_tool_calls(self):
95 """Test that agent can call tools multiple times in one request"""
96
97 calls: list[str] = []
98
99 agent = Agent(
100 "anthropic:claude-3-5-haiku-latest",
101 system_prompt="You are a helpful assistant.",
102 output_type=Response,
103 )
104
105 @agent.tool
106 async def search_web(ctx: RunContext[None], query: str) -> str:
107 """Search for information"""
108 calls.append(f"search: {query}")
109 return f"Info about {query}"
110
111 # Ask for multiple things that need searching
112 await agent.run("Search for information about Python and also about Rust")
113
114 assert len(calls) >= 2, f"Expected multiple searches, got {len(calls)}: {calls}"
115 assert any("Python" in call for call in calls), f"No Python search in: {calls}"
116 assert any("Rust" in call for call in calls), f"No Rust search in: {calls}"