tests/test_tool_usage.py at init

zzstoatzz.io / bot
a digital entity named phi that roams bsky
fork atom
bot / tests / test_tool_usage.py
at init 116 lines 4.1 kB view raw
wrap content
zzstoatzz.io Add thread reply handling with intelligent ignore capability 7 months ago
965e4f35
  1"""Test that proves tools are actually being used by the agent"""
  2
  3import os
  4
  5import pytest
  6from pydantic import BaseModel, Field
  7from pydantic_ai import Agent, RunContext
  8
  9from bot.config import settings
 10
 11
 12class Response(BaseModel):
 13    text: str = Field(description="Response text")
 14
 15
 16class TestToolUsage:
 17    def setup_method(self):
 18        """Set up API key for tests"""
 19        if settings.anthropic_api_key:
 20            os.environ["ANTHROPIC_API_KEY"] = settings.anthropic_api_key
 21
 22    @pytest.mark.asyncio
 23    async def test_agent_uses_tools(self):
 24        """Test that the agent actually calls tools when appropriate"""
 25
 26        if not settings.anthropic_api_key:
 27            pytest.skip("No Anthropic API key configured")
 28
 29        # Track tool calls
 30        tool_calls: list[str] = []
 31
 32        # Create agent
 33        agent = Agent(
 34            "anthropic:claude-3-5-haiku-latest",
 35            system_prompt="You are a helpful assistant. Use tools when asked.",
 36            output_type=Response,
 37        )
 38
 39        # Register a simple tool
 40        @agent.tool
 41        async def get_current_time(ctx: RunContext[None]) -> str:
 42            """Get the current time"""
 43            tool_calls.append("get_current_time")
 44            return "The current time is 3:14 PM"
 45
 46        # Test 1: Query that should NOT use the tool
 47        result = await agent.run("What is 2 + 2?")
 48        assert len(tool_calls) == 0, "Tool was called for simple math question"
 49
 50        # Test 2: Query that SHOULD use the tool
 51        result = await agent.run("What time is it?")
 52        assert len(tool_calls) == 1, (
 53            f"Tool was not called for time question. Calls: {tool_calls}"
 54        )
 55        assert tool_calls[0] == "get_current_time"
 56        assert "3:14" in result.output.text, (
 57            f"Tool result not in response: {result.output.text}"
 58        )
 59
 60    @pytest.mark.asyncio
 61    async def test_search_tool_usage(self):
 62        """Test that search tool is called for appropriate queries"""
 63
 64        tool_calls: list[dict] = []
 65
 66        agent = Agent(
 67            "anthropic:claude-3-5-haiku-latest",
 68            system_prompt="You help answer questions. Use search for current events.",
 69            output_type=Response,
 70        )
 71
 72        @agent.tool
 73        async def search_web(ctx: RunContext[None], query: str) -> str:
 74            """Search the web for information"""
 75            tool_calls.append({"tool": "search_web", "query": query})
 76            return f"Search results for '{query}': Latest news about {query}"
 77
 78        # Should NOT search for simple math
 79        result = await agent.run("What is 2 + 2?")
 80        assert len(tool_calls) == 0, f"Searched for basic math. Calls: {tool_calls}"
 81
 82        # SHOULD search for current events
 83        result = await agent.run("What happened in tech news today?")
 84        assert len(tool_calls) > 0, (
 85            f"Did not search for current news. Response: {result.output.text}"
 86        )
 87        assert tool_calls[0]["tool"] == "search_web"
 88        assert (
 89            "tech" in tool_calls[0]["query"].lower()
 90            or "news" in tool_calls[0]["query"].lower()
 91        )
 92
 93    @pytest.mark.asyncio
 94    async def test_multiple_tool_calls(self):
 95        """Test that agent can call tools multiple times in one request"""
 96
 97        calls: list[str] = []
 98
 99        agent = Agent(
100            "anthropic:claude-3-5-haiku-latest",
101            system_prompt="You are a helpful assistant.",
102            output_type=Response,
103        )
104
105        @agent.tool
106        async def search_web(ctx: RunContext[None], query: str) -> str:
107            """Search for information"""
108            calls.append(f"search: {query}")
109            return f"Info about {query}"
110
111        # Ask for multiple things that need searching
112        await agent.run("Search for information about Python and also about Rust")
113
114        assert len(calls) >= 2, f"Expected multiple searches, got {len(calls)}: {calls}"
115        assert any("Python" in call for call in calls), f"No Python search in: {calls}"
116        assert any("Rust" in call for call in calls), f"No Rust search in: {calls}"