personal memory agent
at main 326 lines 11 kB view raw
1# SPDX-License-Identifier: AGPL-3.0-only 2# Copyright (c) 2026 sol pbc 3 4"""Integration test for OpenAI provider with real API calls.""" 5 6import json 7import os 8import subprocess 9from pathlib import Path 10 11import pytest 12from dotenv import load_dotenv 13 14from tests.integration.conftest import require_cli_tool 15from think.models import GPT_5 16 17 18def get_fixtures_env(): 19 """Load the tests/fixtures/.env file and return the environment.""" 20 fixtures_env = Path(__file__).parent.parent / "fixtures" / ".env" 21 if not fixtures_env.exists(): 22 return None, None, None 23 24 # Load the env file 25 load_dotenv(fixtures_env, override=True) 26 27 api_key = os.getenv("OPENAI_API_KEY") 28 journal_path = os.getenv("_SOLSTONE_JOURNAL_OVERRIDE") 29 30 return fixtures_env, api_key, journal_path 31 32 33@pytest.mark.integration 34@pytest.mark.requires_api 35def test_openai_provider_basic(): 36 """Test OpenAI provider with basic prompt via CLI.""" 37 require_cli_tool("OpenAI", "codex") 38 fixtures_env, api_key, journal_path = get_fixtures_env() 39 40 if not fixtures_env: 41 pytest.skip("tests/fixtures/.env not found") 42 43 if not api_key: 44 pytest.skip("OPENAI_API_KEY not found in tests/fixtures/.env file") 45 46 if not journal_path: 47 pytest.skip("_SOLSTONE_JOURNAL_OVERRIDE not found in tests/fixtures/.env file") 48 49 # Prepare environment 50 env = os.environ.copy() 51 env["_SOLSTONE_JOURNAL_OVERRIDE"] = journal_path 52 env["OPENAI_API_KEY"] = api_key 53 54 # Create NDJSON input (no tool config) 55 ndjson_input = json.dumps( 56 { 57 "prompt": "what is 1+1? Just give me the number.", 58 "provider": "openai", 59 "name": "default", 60 "model": GPT_5, 61 "max_output_tokens": 100, 62 } 63 ) 64 65 # Run the sol think.talents command 66 cmd = ["sol", "providers", "check"] 67 result = subprocess.run( 68 cmd, 69 env=env, 70 input=ndjson_input, 71 capture_output=True, 72 text=True, 73 timeout=10, 74 ) 75 76 # Check that the command succeeded 77 assert result.returncode == 0, f"Command failed with stderr: {result.stderr}" 78 79 # Parse stdout events (should be JSONL format) 80 stdout_lines = result.stdout.strip().split("\n") 81 events = [] 82 for line in stdout_lines: 83 if line: 84 try: 85 events.append(json.loads(line)) 86 except json.JSONDecodeError as e: 87 pytest.fail(f"Failed to parse JSON line: {line}\nError: {e}") 88 89 # Verify we have events 90 assert len(events) >= 2, ( 91 f"Expected at least start and finish events, got {len(events)}" 92 ) 93 94 # Check start event 95 start_event = events[0] 96 assert start_event["event"] == "start" 97 assert start_event["prompt"] == "what is 1+1? Just give me the number." 98 assert start_event["model"] == GPT_5 99 assert start_event["name"] == "default" 100 assert isinstance(start_event["ts"], int) 101 102 # Check finish event 103 finish_event = events[-1] 104 assert finish_event["event"] == "finish" 105 assert isinstance(finish_event["ts"], int) 106 assert "result" in finish_event 107 108 # The result should contain "2" 109 result_text = finish_event["result"].lower() 110 assert "2" in result_text or "two" in result_text, ( 111 f"Expected '2' in response, got: {finish_event['result']}" 112 ) 113 114 # Check for no errors 115 error_events = [e for e in events if e.get("event") == "error"] 116 assert len(error_events) == 0, f"Found error events: {error_events}" 117 118 # Verify stderr has no errors (deprecation warnings from third-party libs are OK) 119 if result.stderr: 120 assert ( 121 "error" not in result.stderr.lower() 122 or "deprecationwarning" in result.stderr.lower() 123 ), f"Unexpected stderr content: {result.stderr}" 124 125 126@pytest.mark.integration 127@pytest.mark.requires_api 128def test_openai_provider_with_reasoning(): 129 """Test OpenAI provider with reasoning model to verify thinking summaries. 130 131 Uses GPT-5 which supports reasoning with summary="detailed" config. 132 The key test is that: 133 1. The request succeeds (reasoning config is valid) 134 2. We may receive thinking events with summaries (model-dependent) 135 3. If thinking events are present, they have the expected structure 136 """ 137 require_cli_tool("OpenAI", "codex") 138 fixtures_env, api_key, journal_path = get_fixtures_env() 139 140 if not fixtures_env: 141 pytest.skip("tests/fixtures/.env not found") 142 143 if not api_key: 144 pytest.skip("OPENAI_API_KEY not found in tests/fixtures/.env file") 145 146 if not journal_path: 147 pytest.skip("_SOLSTONE_JOURNAL_OVERRIDE not found in tests/fixtures/.env file") 148 149 # Prepare environment 150 env = os.environ.copy() 151 env["_SOLSTONE_JOURNAL_OVERRIDE"] = journal_path 152 env["OPENAI_API_KEY"] = api_key 153 154 # Use a prompt that encourages step-by-step reasoning 155 ndjson_input = json.dumps( 156 { 157 "prompt": "If I have 3 apples and buy 5 more, then give away 2, how many do I have? Think through this step by step.", 158 "provider": "openai", 159 "name": "default", 160 "model": GPT_5, 161 "max_output_tokens": 500, 162 } 163 ) 164 165 # Run the sol think.talents command 166 cmd = ["sol", "providers", "check"] 167 result = subprocess.run( 168 cmd, 169 env=env, 170 input=ndjson_input, 171 capture_output=True, 172 text=True, 173 timeout=30, # Increased for reasoning 174 ) 175 176 assert result.returncode == 0, f"Command failed with stderr: {result.stderr}" 177 178 # Parse events 179 stdout_lines = result.stdout.strip().split("\n") 180 events = [json.loads(line) for line in stdout_lines if line] 181 182 # Verify no errors 183 error_events = [e for e in events if e.get("event") == "error"] 184 assert len(error_events) == 0, f"Found error events: {error_events}" 185 186 # Check for thinking events - GPT-5 series should produce these 187 # when reasoning config is properly set 188 thinking_events = [e for e in events if e.get("event") == "thinking"] 189 190 # If we have thinking events, verify their structure 191 for thinking in thinking_events: 192 assert "summary" in thinking, f"Thinking event missing 'summary': {thinking}" 193 assert isinstance(thinking["summary"], str), ( 194 f"Thinking summary should be string: {thinking}" 195 ) 196 assert len(thinking["summary"]) > 0, "Thinking summary should not be empty" 197 assert "model" in thinking, f"Thinking event missing 'model': {thinking}" 198 assert "ts" in thinking, f"Thinking event missing 'ts': {thinking}" 199 assert isinstance(thinking["ts"], int), "Timestamp should be int" 200 201 # Verify the answer is correct (6 apples: 3 + 5 - 2 = 6) 202 finish_event = events[-1] 203 assert finish_event["event"] == "finish" 204 result_text = finish_event["result"].lower() 205 assert "6" in result_text or "six" in result_text, ( 206 f"Expected '6' in response, got: {finish_event['result']}" 207 ) 208 209 # Log whether we got thinking events for debugging 210 print(f"Received {len(thinking_events)} thinking events") 211 212 213@pytest.mark.integration 214@pytest.mark.requires_api 215def test_openai_provider_with_extra_context(): 216 """Test OpenAI provider with extra_context to verify Responses API format. 217 218 This exercises the session.add_items() path that was broken when content type 219 was 'text' instead of 'input_text'. The key assertion is that we don't get 220 the 400 error about invalid content type. 221 """ 222 require_cli_tool("OpenAI", "codex") 223 fixtures_env, api_key, journal_path = get_fixtures_env() 224 225 if not fixtures_env: 226 pytest.skip("tests/fixtures/.env not found") 227 228 if not api_key: 229 pytest.skip("OPENAI_API_KEY not found in tests/fixtures/.env file") 230 231 if not journal_path: 232 pytest.skip("_SOLSTONE_JOURNAL_OVERRIDE not found in tests/fixtures/.env file") 233 234 # Prepare environment 235 env = os.environ.copy() 236 env["_SOLSTONE_JOURNAL_OVERRIDE"] = journal_path 237 env["OPENAI_API_KEY"] = api_key 238 239 # Include extra_context like get_talent() does in production 240 # This exercises the _convert_turns_to_items() code path 241 ndjson_input = json.dumps( 242 { 243 "prompt": "What project was mentioned in the context above? Just the name.", 244 "provider": "openai", 245 "name": "default", 246 "model": GPT_5, 247 "max_output_tokens": 50, 248 "extra_context": "## Project Context\nYou are working on Project Moonshot.", 249 } 250 ) 251 252 # Run the sol think.talents command 253 cmd = ["sol", "providers", "check"] 254 result = subprocess.run( 255 cmd, 256 env=env, 257 input=ndjson_input, 258 capture_output=True, 259 text=True, 260 timeout=15, 261 ) 262 263 # Parse stdout events 264 stdout_lines = result.stdout.strip().split("\n") 265 events = [json.loads(line) for line in stdout_lines if line] 266 267 # The critical check: no 400 error about invalid content type 268 # This was the original bug - using 'text' instead of 'input_text' 269 error_events = [e for e in events if e.get("event") == "error"] 270 for err in error_events: 271 error_msg = err.get("error", "") 272 assert "Invalid value: 'text'" not in error_msg, ( 273 f"Got content type format error - regression! Error: {error_msg}" 274 ) 275 assert "input_text" not in error_msg or "Supported values" not in error_msg, ( 276 f"Got content type format error - regression! Error: {error_msg}" 277 ) 278 279 # Verify we got past the format validation (start event was emitted) 280 start_events = [e for e in events if e.get("event") == "start"] 281 assert len(start_events) == 1, "Should have start event" 282 283 # If we get a finish event, verify the response references the context 284 finish_events = [e for e in events if e.get("event") == "finish"] 285 if finish_events: 286 result_text = finish_events[0].get("result", "").lower() 287 assert "moonshot" in result_text, ( 288 f"Expected 'moonshot' in response, got: {finish_events[0].get('result')}" 289 ) 290 291 292@pytest.mark.integration 293@pytest.mark.requires_api 294def test_openai_json_truncation_detection(): 295 """Test that OpenAI provider detects JSON response truncation via finish_reason. 296 297 Uses a small max_output_tokens to force truncation, verifying that 298 the provider returns finish_reason='max_tokens' which callers can use 299 to detect incomplete responses. 300 """ 301 fixtures_env, api_key, _ = get_fixtures_env() 302 303 if not fixtures_env: 304 pytest.skip("tests/fixtures/.env not found") 305 306 if not api_key: 307 pytest.skip("OPENAI_API_KEY not found in tests/fixtures/.env file") 308 309 # Import provider directly for this test 310 from think.providers import openai as openai_provider 311 312 # Request JSON output with small token limit to force truncation 313 # Use run_generate which returns GenerateResult, then check finish_reason 314 result = openai_provider.run_generate( 315 contents="Return a JSON array of the first 50 prime numbers.", 316 model=GPT_5, 317 json_output=True, 318 max_output_tokens=50, # Too small to complete the response 319 ) 320 321 # Verify truncation was detected via finish_reason 322 assert result["finish_reason"] == "max_tokens", ( 323 f"Expected max_tokens finish_reason, got: {result['finish_reason']}" 324 ) 325 # Partial text should be present 326 assert isinstance(result["text"], str)