personal memory agent
1# SPDX-License-Identifier: AGPL-3.0-only
2# Copyright (c) 2026 sol pbc
3
4"""Integration test for OpenAI provider with real API calls."""
5
6import json
7import os
8import subprocess
9from pathlib import Path
10
11import pytest
12from dotenv import load_dotenv
13
14from tests.integration.conftest import require_cli_tool
15from think.models import GPT_5
16
17
18def get_fixtures_env():
19 """Load the tests/fixtures/.env file and return the environment."""
20 fixtures_env = Path(__file__).parent.parent / "fixtures" / ".env"
21 if not fixtures_env.exists():
22 return None, None, None
23
24 # Load the env file
25 load_dotenv(fixtures_env, override=True)
26
27 api_key = os.getenv("OPENAI_API_KEY")
28 journal_path = os.getenv("_SOLSTONE_JOURNAL_OVERRIDE")
29
30 return fixtures_env, api_key, journal_path
31
32
33@pytest.mark.integration
34@pytest.mark.requires_api
35def test_openai_provider_basic():
36 """Test OpenAI provider with basic prompt via CLI."""
37 require_cli_tool("OpenAI", "codex")
38 fixtures_env, api_key, journal_path = get_fixtures_env()
39
40 if not fixtures_env:
41 pytest.skip("tests/fixtures/.env not found")
42
43 if not api_key:
44 pytest.skip("OPENAI_API_KEY not found in tests/fixtures/.env file")
45
46 if not journal_path:
47 pytest.skip("_SOLSTONE_JOURNAL_OVERRIDE not found in tests/fixtures/.env file")
48
49 # Prepare environment
50 env = os.environ.copy()
51 env["_SOLSTONE_JOURNAL_OVERRIDE"] = journal_path
52 env["OPENAI_API_KEY"] = api_key
53
54 # Create NDJSON input (no tool config)
55 ndjson_input = json.dumps(
56 {
57 "prompt": "what is 1+1? Just give me the number.",
58 "provider": "openai",
59 "name": "default",
60 "model": GPT_5,
61 "max_output_tokens": 100,
62 }
63 )
64
65 # Run the sol think.talents command
66 cmd = ["sol", "providers", "check"]
67 result = subprocess.run(
68 cmd,
69 env=env,
70 input=ndjson_input,
71 capture_output=True,
72 text=True,
73 timeout=10,
74 )
75
76 # Check that the command succeeded
77 assert result.returncode == 0, f"Command failed with stderr: {result.stderr}"
78
79 # Parse stdout events (should be JSONL format)
80 stdout_lines = result.stdout.strip().split("\n")
81 events = []
82 for line in stdout_lines:
83 if line:
84 try:
85 events.append(json.loads(line))
86 except json.JSONDecodeError as e:
87 pytest.fail(f"Failed to parse JSON line: {line}\nError: {e}")
88
89 # Verify we have events
90 assert len(events) >= 2, (
91 f"Expected at least start and finish events, got {len(events)}"
92 )
93
94 # Check start event
95 start_event = events[0]
96 assert start_event["event"] == "start"
97 assert start_event["prompt"] == "what is 1+1? Just give me the number."
98 assert start_event["model"] == GPT_5
99 assert start_event["name"] == "default"
100 assert isinstance(start_event["ts"], int)
101
102 # Check finish event
103 finish_event = events[-1]
104 assert finish_event["event"] == "finish"
105 assert isinstance(finish_event["ts"], int)
106 assert "result" in finish_event
107
108 # The result should contain "2"
109 result_text = finish_event["result"].lower()
110 assert "2" in result_text or "two" in result_text, (
111 f"Expected '2' in response, got: {finish_event['result']}"
112 )
113
114 # Check for no errors
115 error_events = [e for e in events if e.get("event") == "error"]
116 assert len(error_events) == 0, f"Found error events: {error_events}"
117
118 # Verify stderr has no errors (deprecation warnings from third-party libs are OK)
119 if result.stderr:
120 assert (
121 "error" not in result.stderr.lower()
122 or "deprecationwarning" in result.stderr.lower()
123 ), f"Unexpected stderr content: {result.stderr}"
124
125
126@pytest.mark.integration
127@pytest.mark.requires_api
128def test_openai_provider_with_reasoning():
129 """Test OpenAI provider with reasoning model to verify thinking summaries.
130
131 Uses GPT-5 which supports reasoning with summary="detailed" config.
132 The key test is that:
133 1. The request succeeds (reasoning config is valid)
134 2. We may receive thinking events with summaries (model-dependent)
135 3. If thinking events are present, they have the expected structure
136 """
137 require_cli_tool("OpenAI", "codex")
138 fixtures_env, api_key, journal_path = get_fixtures_env()
139
140 if not fixtures_env:
141 pytest.skip("tests/fixtures/.env not found")
142
143 if not api_key:
144 pytest.skip("OPENAI_API_KEY not found in tests/fixtures/.env file")
145
146 if not journal_path:
147 pytest.skip("_SOLSTONE_JOURNAL_OVERRIDE not found in tests/fixtures/.env file")
148
149 # Prepare environment
150 env = os.environ.copy()
151 env["_SOLSTONE_JOURNAL_OVERRIDE"] = journal_path
152 env["OPENAI_API_KEY"] = api_key
153
154 # Use a prompt that encourages step-by-step reasoning
155 ndjson_input = json.dumps(
156 {
157 "prompt": "If I have 3 apples and buy 5 more, then give away 2, how many do I have? Think through this step by step.",
158 "provider": "openai",
159 "name": "default",
160 "model": GPT_5,
161 "max_output_tokens": 500,
162 }
163 )
164
165 # Run the sol think.talents command
166 cmd = ["sol", "providers", "check"]
167 result = subprocess.run(
168 cmd,
169 env=env,
170 input=ndjson_input,
171 capture_output=True,
172 text=True,
173 timeout=30, # Increased for reasoning
174 )
175
176 assert result.returncode == 0, f"Command failed with stderr: {result.stderr}"
177
178 # Parse events
179 stdout_lines = result.stdout.strip().split("\n")
180 events = [json.loads(line) for line in stdout_lines if line]
181
182 # Verify no errors
183 error_events = [e for e in events if e.get("event") == "error"]
184 assert len(error_events) == 0, f"Found error events: {error_events}"
185
186 # Check for thinking events - GPT-5 series should produce these
187 # when reasoning config is properly set
188 thinking_events = [e for e in events if e.get("event") == "thinking"]
189
190 # If we have thinking events, verify their structure
191 for thinking in thinking_events:
192 assert "summary" in thinking, f"Thinking event missing 'summary': {thinking}"
193 assert isinstance(thinking["summary"], str), (
194 f"Thinking summary should be string: {thinking}"
195 )
196 assert len(thinking["summary"]) > 0, "Thinking summary should not be empty"
197 assert "model" in thinking, f"Thinking event missing 'model': {thinking}"
198 assert "ts" in thinking, f"Thinking event missing 'ts': {thinking}"
199 assert isinstance(thinking["ts"], int), "Timestamp should be int"
200
201 # Verify the answer is correct (6 apples: 3 + 5 - 2 = 6)
202 finish_event = events[-1]
203 assert finish_event["event"] == "finish"
204 result_text = finish_event["result"].lower()
205 assert "6" in result_text or "six" in result_text, (
206 f"Expected '6' in response, got: {finish_event['result']}"
207 )
208
209 # Log whether we got thinking events for debugging
210 print(f"Received {len(thinking_events)} thinking events")
211
212
213@pytest.mark.integration
214@pytest.mark.requires_api
215def test_openai_provider_with_extra_context():
216 """Test OpenAI provider with extra_context to verify Responses API format.
217
218 This exercises the session.add_items() path that was broken when content type
219 was 'text' instead of 'input_text'. The key assertion is that we don't get
220 the 400 error about invalid content type.
221 """
222 require_cli_tool("OpenAI", "codex")
223 fixtures_env, api_key, journal_path = get_fixtures_env()
224
225 if not fixtures_env:
226 pytest.skip("tests/fixtures/.env not found")
227
228 if not api_key:
229 pytest.skip("OPENAI_API_KEY not found in tests/fixtures/.env file")
230
231 if not journal_path:
232 pytest.skip("_SOLSTONE_JOURNAL_OVERRIDE not found in tests/fixtures/.env file")
233
234 # Prepare environment
235 env = os.environ.copy()
236 env["_SOLSTONE_JOURNAL_OVERRIDE"] = journal_path
237 env["OPENAI_API_KEY"] = api_key
238
239 # Include extra_context like get_talent() does in production
240 # This exercises the _convert_turns_to_items() code path
241 ndjson_input = json.dumps(
242 {
243 "prompt": "What project was mentioned in the context above? Just the name.",
244 "provider": "openai",
245 "name": "default",
246 "model": GPT_5,
247 "max_output_tokens": 50,
248 "extra_context": "## Project Context\nYou are working on Project Moonshot.",
249 }
250 )
251
252 # Run the sol think.talents command
253 cmd = ["sol", "providers", "check"]
254 result = subprocess.run(
255 cmd,
256 env=env,
257 input=ndjson_input,
258 capture_output=True,
259 text=True,
260 timeout=15,
261 )
262
263 # Parse stdout events
264 stdout_lines = result.stdout.strip().split("\n")
265 events = [json.loads(line) for line in stdout_lines if line]
266
267 # The critical check: no 400 error about invalid content type
268 # This was the original bug - using 'text' instead of 'input_text'
269 error_events = [e for e in events if e.get("event") == "error"]
270 for err in error_events:
271 error_msg = err.get("error", "")
272 assert "Invalid value: 'text'" not in error_msg, (
273 f"Got content type format error - regression! Error: {error_msg}"
274 )
275 assert "input_text" not in error_msg or "Supported values" not in error_msg, (
276 f"Got content type format error - regression! Error: {error_msg}"
277 )
278
279 # Verify we got past the format validation (start event was emitted)
280 start_events = [e for e in events if e.get("event") == "start"]
281 assert len(start_events) == 1, "Should have start event"
282
283 # If we get a finish event, verify the response references the context
284 finish_events = [e for e in events if e.get("event") == "finish"]
285 if finish_events:
286 result_text = finish_events[0].get("result", "").lower()
287 assert "moonshot" in result_text, (
288 f"Expected 'moonshot' in response, got: {finish_events[0].get('result')}"
289 )
290
291
292@pytest.mark.integration
293@pytest.mark.requires_api
294def test_openai_json_truncation_detection():
295 """Test that OpenAI provider detects JSON response truncation via finish_reason.
296
297 Uses a small max_output_tokens to force truncation, verifying that
298 the provider returns finish_reason='max_tokens' which callers can use
299 to detect incomplete responses.
300 """
301 fixtures_env, api_key, _ = get_fixtures_env()
302
303 if not fixtures_env:
304 pytest.skip("tests/fixtures/.env not found")
305
306 if not api_key:
307 pytest.skip("OPENAI_API_KEY not found in tests/fixtures/.env file")
308
309 # Import provider directly for this test
310 from think.providers import openai as openai_provider
311
312 # Request JSON output with small token limit to force truncation
313 # Use run_generate which returns GenerateResult, then check finish_reason
314 result = openai_provider.run_generate(
315 contents="Return a JSON array of the first 50 prime numbers.",
316 model=GPT_5,
317 json_output=True,
318 max_output_tokens=50, # Too small to complete the response
319 )
320
321 # Verify truncation was detected via finish_reason
322 assert result["finish_reason"] == "max_tokens", (
323 f"Expected max_tokens finish_reason, got: {result['finish_reason']}"
324 )
325 # Partial text should be present
326 assert isinstance(result["text"], str)