tests/test_cogitate_coder.py at main

solpbc.org / solstone
fork atom
personal memory agent
fork atom
solstone / tests / test_cogitate_coder.py
at main 229 lines 9.0 kB view raw
wrap content
Jer Miller refactor: rename muse → talent project-wide 23hrs ago
cb19a905
  1# SPDX-License-Identifier: AGPL-3.0-only
  2# Copyright (c) 2026 sol pbc
  3
  4"""Tests for cogitate coder mode: write flag, coder agent."""
  5
  6import asyncio
  7import importlib
  8from unittest.mock import AsyncMock, patch
  9
 10# ---------------------------------------------------------------------------
 11# Write flag — Anthropic provider
 12# ---------------------------------------------------------------------------
 13
 14
 15class TestAnthropicWriteFlag:
 16    """Verify --allowedTools is controlled by config write flag."""
 17
 18    def _provider(self):
 19        return importlib.import_module("think.providers.anthropic")
 20
 21    @patch("think.providers.anthropic.check_cli_binary")
 22    @patch("think.providers.anthropic.CLIRunner")
 23    def test_no_write_restricts_tools(self, mock_runner_cls, mock_check):
 24        """Without write flag, --allowedTools restricts to sol."""
 25        provider = self._provider()
 26        mock_instance = AsyncMock()
 27        mock_instance.run = AsyncMock(return_value="result")
 28        mock_instance.cli_session_id = None
 29        mock_runner_cls.return_value = mock_instance
 30
 31        config = {"prompt": "test", "model": "claude-sonnet-4-20250514"}
 32        asyncio.run(provider.run_cogitate(config))
 33
 34        cmd = mock_runner_cls.call_args.kwargs["cmd"]
 35        assert "--allowedTools" in cmd
 36        assert "Bash(sol *)" in cmd
 37
 38    @patch("think.providers.anthropic.check_cli_binary")
 39    @patch("think.providers.anthropic.CLIRunner")
 40    def test_write_true_grants_full_access(self, mock_runner_cls, mock_check):
 41        """With write=True, --allowedTools is omitted for full tool access."""
 42        provider = self._provider()
 43        mock_instance = AsyncMock()
 44        mock_instance.run = AsyncMock(return_value="result")
 45        mock_instance.cli_session_id = None
 46        mock_runner_cls.return_value = mock_instance
 47
 48        config = {"prompt": "test", "model": "claude-sonnet-4-20250514", "write": True}
 49        asyncio.run(provider.run_cogitate(config))
 50
 51        cmd = mock_runner_cls.call_args.kwargs["cmd"]
 52        assert "--allowedTools" not in cmd
 53
 54    @patch("think.providers.anthropic.check_cli_binary")
 55    @patch("think.providers.anthropic.CLIRunner")
 56    def test_write_false_restricts_tools(self, mock_runner_cls, mock_check):
 57        """Explicit write=False keeps restriction."""
 58        provider = self._provider()
 59        mock_instance = AsyncMock()
 60        mock_instance.run = AsyncMock(return_value="result")
 61        mock_instance.cli_session_id = None
 62        mock_runner_cls.return_value = mock_instance
 63
 64        config = {"prompt": "test", "model": "claude-sonnet-4-20250514", "write": False}
 65        asyncio.run(provider.run_cogitate(config))
 66
 67        cmd = mock_runner_cls.call_args.kwargs["cmd"]
 68        assert "--allowedTools" in cmd
 69
 70
 71# ---------------------------------------------------------------------------
 72# Write flag — OpenAI provider
 73# ---------------------------------------------------------------------------
 74
 75
 76class TestOpenAIWriteFlag:
 77    """Verify sandbox mode is controlled by config write flag."""
 78
 79    def _provider(self):
 80        return importlib.import_module("think.providers.openai")
 81
 82    @patch("think.providers.openai.CLIRunner")
 83    def test_no_write_uses_readonly_sandbox(self, mock_runner_cls):
 84        """Without write flag, sandbox is read-only."""
 85        provider = self._provider()
 86        mock_instance = AsyncMock()
 87        mock_instance.run = AsyncMock(return_value="result")
 88        mock_instance.cli_session_id = None
 89        mock_runner_cls.return_value = mock_instance
 90
 91        config = {"prompt": "test", "model": "gpt-5.2"}
 92        asyncio.run(provider.run_cogitate(config))
 93
 94        cmd = mock_runner_cls.call_args.kwargs["cmd"]
 95        # Find the -s flag and its value
 96        s_idx = cmd.index("-s")
 97        assert cmd[s_idx + 1] == "read-only"
 98
 99    @patch("think.providers.openai.CLIRunner")
100    def test_write_true_uses_write_sandbox(self, mock_runner_cls):
101        """With write=True, sandbox is write."""
102        provider = self._provider()
103        mock_instance = AsyncMock()
104        mock_instance.run = AsyncMock(return_value="result")
105        mock_instance.cli_session_id = None
106        mock_runner_cls.return_value = mock_instance
107
108        config = {"prompt": "test", "model": "gpt-5.2", "write": True}
109        asyncio.run(provider.run_cogitate(config))
110
111        cmd = mock_runner_cls.call_args.kwargs["cmd"]
112        s_idx = cmd.index("-s")
113        assert cmd[s_idx + 1] == "workspace-write"
114
115    @patch("think.providers.openai.CLIRunner")
116    def test_write_true_with_session_resume(self, mock_runner_cls):
117        """Write flag works correctly with session resume path."""
118        provider = self._provider()
119        mock_instance = AsyncMock()
120        mock_instance.run = AsyncMock(return_value="result")
121        mock_instance.cli_session_id = None
122        mock_runner_cls.return_value = mock_instance
123
124        config = {
125            "prompt": "test",
126            "model": "gpt-5.2",
127            "write": True,
128            "session_id": "sess-123",
129        }
130        asyncio.run(provider.run_cogitate(config))
131
132        cmd = mock_runner_cls.call_args.kwargs["cmd"]
133        s_idx = cmd.index("-s")
134        assert cmd[s_idx + 1] == "workspace-write"
135        assert "resume" in cmd
136
137
138# ---------------------------------------------------------------------------
139# Write flag — Google provider
140# ---------------------------------------------------------------------------
141
142
143class TestGoogleWriteFlag:
144    """Verify --allowed-tools is controlled by config write flag."""
145
146    def _provider(self):
147        return importlib.import_module("think.providers.google")
148
149    @patch("think.providers.google.CLIRunner")
150    def test_no_write_restricts_tools(self, mock_runner_cls):
151        """Without write flag, --allowed-tools restricts to sol."""
152        provider = self._provider()
153        mock_instance = AsyncMock()
154        mock_instance.run = AsyncMock(return_value="result")
155        mock_instance.cli_session_id = None
156        mock_runner_cls.return_value = mock_instance
157
158        config = {"prompt": "test", "model": "gemini-2.5-flash"}
159        asyncio.run(provider.run_cogitate(config))
160
161        cmd = mock_runner_cls.call_args.kwargs["cmd"]
162        assert "--allowed-tools" in cmd
163        assert "run_shell_command(sol)" in cmd
164
165    @patch("think.providers.google.CLIRunner")
166    def test_write_true_grants_full_access(self, mock_runner_cls):
167        """With write=True, --allowed-tools is omitted."""
168        provider = self._provider()
169        mock_instance = AsyncMock()
170        mock_instance.run = AsyncMock(return_value="result")
171        mock_instance.cli_session_id = None
172        mock_runner_cls.return_value = mock_instance
173
174        config = {"prompt": "test", "model": "gemini-2.5-flash", "write": True}
175        asyncio.run(provider.run_cogitate(config))
176
177        cmd = mock_runner_cls.call_args.kwargs["cmd"]
178        assert "--allowed-tools" not in cmd
179
180
181# ---------------------------------------------------------------------------
182# talent/coder.md existence and frontmatter
183# ---------------------------------------------------------------------------
184
185
186class TestCoderAgent:
187    """Verify talent/coder.md exists with correct frontmatter."""
188
189    def test_coder_md_exists(self):
190        """talent/coder.md must exist in the repo."""
191        from pathlib import Path
192
193        coder_path = Path(__file__).parent.parent / "talent" / "coder.md"
194        assert coder_path.exists(), "talent/coder.md not found"
195
196    def test_coder_frontmatter(self):
197        """coder.md must have write: true and type: cogitate."""
198        from pathlib import Path
199
200        import frontmatter
201
202        coder_path = Path(__file__).parent.parent / "talent" / "coder.md"
203        post = frontmatter.load(coder_path)
204
205        assert post.metadata.get("type") == "cogitate"
206        assert post.metadata.get("write") is True
207        assert post.metadata.get("title") == "Coder"
208        assert "description" in post.metadata
209
210    def test_coder_references_coding_skill(self):
211        """coder.md must reference the coding skill instead of inlining guidelines."""
212        from pathlib import Path
213
214        coder_path = Path(__file__).parent.parent / "talent" / "coder.md"
215        content = coder_path.read_text(encoding="utf-8")
216
217        # Should reference the coding skill, not inline dev guidelines
218        assert "coding" in content.lower()
219        assert "single source of truth" in content
220
221        # The coding skill must exist with reference files
222        coding_skill = Path(__file__).parent.parent / "talent" / "coding" / "SKILL.md"
223        assert coding_skill.exists(), "talent/coding/SKILL.md not found"
224
225        coding_refs = Path(__file__).parent.parent / "talent" / "coding" / "reference"
226        assert (coding_refs / "coding-standards.md").exists()
227        assert (coding_refs / "project-structure.md").exists()
228        assert (coding_refs / "testing.md").exists()
229        assert (coding_refs / "environment.md").exists()