tests/test_output_hooks.py at scratch/segment-sense-rd

solpbc.org / solstone
fork atom
personal memory agent
fork atom
solstone / tests / test_output_hooks.py
at scratch/segment-sense-rd 493 lines 16 kB view raw
wrap content
Jer Miller Canonicalize journal path to ./journal/ from project root 4w ago
449fb109
  1# SPDX-License-Identifier: AGPL-3.0-only
  2# Copyright (c) 2026 sol pbc
  3
  4"""Tests for the generator output hooks system.
  5
  6Tests cover:
  7- Hook loading and validation via load_post_hook / load_pre_hook
  8- Hook invocation via NDJSON protocol
  9- Hook error handling
 10"""
 11
 12import importlib
 13import io
 14import json
 15import os
 16import shutil
 17from pathlib import Path
 18
 19from think.muse import load_post_hook, load_pre_hook
 20from think.utils import day_path
 21
 22FIXTURES = Path("tests/fixtures")
 23
 24
 25def copy_day(tmp_path: Path) -> Path:
 26    os.environ["_SOLSTONE_JOURNAL_OVERRIDE"] = str(tmp_path)
 27    dest = day_path("20240101")
 28    src = FIXTURES / "journal" / "20240101"
 29    for item in src.iterdir():
 30        if item.is_dir():
 31            shutil.copytree(item, dest / item.name, dirs_exist_ok=True)
 32        else:
 33            shutil.copy2(item, dest / item.name)
 34    return dest
 35
 36
 37MOCK_RESULT = {
 38    "text": "## Original Result\n\nThis is the original output content.",
 39    "usage": {"input_tokens": 100, "output_tokens": 50},
 40}
 41
 42
 43def run_generator_with_config(mod, config: dict, monkeypatch) -> list[dict]:
 44    """Run generator with NDJSON config and capture output events."""
 45    # Mock argv to prevent argparse from seeing pytest args
 46    monkeypatch.setattr("sys.argv", ["sol"])
 47
 48    stdin_data = json.dumps(config) + "\n"
 49    monkeypatch.setattr("sys.stdin", io.StringIO(stdin_data))
 50
 51    captured_output = io.StringIO()
 52    monkeypatch.setattr("sys.stdout", captured_output)
 53
 54    mod.main()
 55
 56    events = []
 57    captured_output.seek(0)
 58    for line in captured_output:
 59        line = line.strip()
 60        if line:
 61            events.append(json.loads(line))
 62
 63    return events
 64
 65
 66def test_load_post_hook_success(tmp_path):
 67    """Test loading a valid hook with post_process function."""
 68    hook_file = tmp_path / "test_hook.py"
 69    hook_file.write_text("""
 70def post_process(result, context):
 71    return result + "\\n\\n## Added by hook"
 72""")
 73
 74    # Config with explicit path
 75    config = {"hook": {"post": str(hook_file)}}
 76    hook_fn = load_post_hook(config)
 77    assert callable(hook_fn)
 78
 79    # Test the hook transforms content
 80    output = hook_fn("Original", {"day": "20240101"})
 81    assert output == "Original\n\n## Added by hook"
 82
 83
 84def test_load_post_hook_missing_post_process(tmp_path):
 85    """Test that hook without post_process function raises ValueError."""
 86    hook_file = tmp_path / "bad_hook.py"
 87    hook_file.write_text("""
 88def other_function():
 89    pass
 90""")
 91
 92    config = {"hook": {"post": str(hook_file)}}
 93    try:
 94        load_post_hook(config)
 95        assert False, "Should have raised ValueError"
 96    except ValueError as e:
 97        assert "must define a 'post_process' function" in str(e)
 98
 99
100def test_load_post_hook_not_callable(tmp_path):
101    """Test that hook with non-callable post_process raises ValueError."""
102    hook_file = tmp_path / "bad_hook.py"
103    hook_file.write_text("""
104post_process = "not a function"
105""")
106
107    config = {"hook": {"post": str(hook_file)}}
108    try:
109        load_post_hook(config)
110        assert False, "Should have raised ValueError"
111    except ValueError as e:
112        assert "'post_process' must be callable" in str(e)
113
114
115def test_load_post_hook_no_hook_config():
116    """Test that missing hook config returns None."""
117    assert load_post_hook({}) is None
118    assert load_post_hook({"hook": {}}) is None
119    assert load_post_hook({"hook": {"pre": "something"}}) is None
120
121
122def test_load_post_hook_named_resolution():
123    """Test that named hooks resolve to muse/{name}.py."""
124    # occurrence.py exists in muse/
125    config = {"hook": {"post": "occurrence"}}
126    hook_fn = load_post_hook(config)
127    assert callable(hook_fn)
128
129
130def test_load_post_hook_file_not_found(tmp_path):
131    """Test that nonexistent hook file raises ImportError."""
132    config = {"hook": {"post": str(tmp_path / "nonexistent.py")}}
133    try:
134        load_post_hook(config)
135        assert False, "Should have raised ImportError"
136    except ImportError as e:
137        assert "not found" in str(e)
138
139
140def test_prompt_metadata_no_hook_path(tmp_path):
141    """Test that _load_prompt_metadata no longer sets hook_path."""
142    muse = importlib.import_module("think.muse")
143
144    md_file = tmp_path / "test_generator.md"
145    md_file.write_text(
146        '{\n  "title": "Test",\n  "hook": {"post": "entities"}\n}\n\nTest prompt'
147    )
148
149    # Create a co-located .py file
150    hook_file = tmp_path / "test_generator.py"
151    hook_file.write_text("def post_process(r, c): return r")
152
153    meta = muse._load_prompt_metadata(md_file)
154
155    # hook_path should no longer be set (hooks are loaded via load_post_hook)
156    assert "hook_path" not in meta
157    assert meta["path"] == str(md_file)
158    assert meta["title"] == "Test"
159
160
161def test_output_hook_invocation(tmp_path, monkeypatch):
162    """Test that agents.py invokes hook and uses transformed result."""
163    mod = importlib.import_module("think.agents")
164    copy_day(tmp_path)
165
166    # Use tmp_path as muse directory to avoid polluting real muse/
167    import think.muse
168
169    monkeypatch.setattr(think.muse, "MUSE_DIR", tmp_path)
170
171    prompt_file = tmp_path / "hooked_test.md"
172    prompt_file.write_text(
173        '{\n  "type": "generate",\n  "title": "Hooked",\n  "schedule": "daily",\n  "priority": 10,\n  "output": "md",\n  "hook": {"post": "hooked_test"},\n  "instructions": {"system": "journal", "sources": {"transcripts": true, "percepts": true}}\n}\n\nTest prompt'
174    )
175
176    hook_file = tmp_path / "hooked_test.py"
177    hook_file.write_text("""
178def post_process(result, context):
179    # Verify context has expected fields
180    assert "day" in context
181    assert "transcript" in context
182    assert "name" in context
183    return result + "\\n\\n## Hook was here"
184""")
185
186    # Mock the underlying generation function in think.models
187    import think.models
188
189    monkeypatch.setattr(
190        think.models,
191        "generate_with_result",
192        lambda *a, **k: MOCK_RESULT,
193    )
194    monkeypatch.setenv("GOOGLE_API_KEY", "x")
195    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
196
197    config = {
198        "name": "hooked_test",
199        "day": "20240101",
200        "output": "md",
201        "provider": "google",
202        "model": "gemini-2.0-flash",
203    }
204
205    events = run_generator_with_config(mod, config, monkeypatch)
206
207    # Find finish event
208    finish_events = [e for e in events if e["event"] == "finish"]
209    assert len(finish_events) == 1
210
211    content = finish_events[0]["result"]
212    assert "## Original Result" in content
213    assert "## Hook was here" in content
214
215
216def test_output_hook_returns_none(tmp_path, monkeypatch):
217    """Test that hook returning None uses original result."""
218    mod = importlib.import_module("think.agents")
219    copy_day(tmp_path)
220
221    import think.muse
222
223    monkeypatch.setattr(think.muse, "MUSE_DIR", tmp_path)
224
225    prompt_file = tmp_path / "noop_test.md"
226    prompt_file.write_text(
227        '{\n  "type": "generate",\n  "title": "Noop",\n  "schedule": "daily",\n  "priority": 10,\n  "output": "md",\n  "hook": {"post": "noop_test"},\n  "instructions": {"system": "journal", "sources": {"transcripts": true, "percepts": true}}\n}\n\nTest prompt'
228    )
229
230    hook_file = tmp_path / "noop_test.py"
231    hook_file.write_text("""
232def post_process(result, context):
233    return None  # Signal to use original
234""")
235
236    # Mock the underlying generation function in think.models
237    import think.models
238
239    monkeypatch.setattr(
240        think.models,
241        "generate_with_result",
242        lambda *a, **k: MOCK_RESULT,
243    )
244    monkeypatch.setenv("GOOGLE_API_KEY", "x")
245    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
246
247    config = {
248        "name": "noop_test",
249        "day": "20240101",
250        "output": "md",
251        "provider": "google",
252        "model": "gemini-2.0-flash",
253    }
254
255    events = run_generator_with_config(mod, config, monkeypatch)
256
257    finish_events = [e for e in events if e["event"] == "finish"]
258    assert len(finish_events) == 1
259    assert finish_events[0]["result"] == MOCK_RESULT["text"]
260
261
262def test_output_hook_error_fallback(tmp_path, monkeypatch):
263    """Test that hook errors fall back to original result."""
264    mod = importlib.import_module("think.agents")
265    copy_day(tmp_path)
266
267    import think.muse
268
269    monkeypatch.setattr(think.muse, "MUSE_DIR", tmp_path)
270
271    prompt_file = tmp_path / "broken_test.md"
272    prompt_file.write_text(
273        '{\n  "type": "generate",\n  "title": "Broken",\n  "schedule": "daily",\n  "priority": 10,\n  "output": "md",\n  "hook": {"post": "broken_test"},\n  "instructions": {"system": "journal", "sources": {"transcripts": true, "percepts": true}}\n}\n\nTest prompt'
274    )
275
276    hook_file = tmp_path / "broken_test.py"
277    hook_file.write_text("""
278def post_process(result, context):
279    raise RuntimeError("Hook exploded!")
280""")
281
282    # Mock the underlying generation function in think.models
283    import think.models
284
285    monkeypatch.setattr(
286        think.models,
287        "generate_with_result",
288        lambda *a, **k: MOCK_RESULT,
289    )
290    monkeypatch.setenv("GOOGLE_API_KEY", "x")
291    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
292
293    config = {
294        "name": "broken_test",
295        "day": "20240101",
296        "output": "md",
297        "provider": "google",
298        "model": "gemini-2.0-flash",
299    }
300
301    # Should not raise, should fall back gracefully
302    events = run_generator_with_config(mod, config, monkeypatch)
303
304    finish_events = [e for e in events if e["event"] == "finish"]
305    assert len(finish_events) == 1
306    assert finish_events[0]["result"] == MOCK_RESULT["text"]
307
308
309# =============================================================================
310# Pre-hook Tests
311# =============================================================================
312
313
314def test_load_pre_hook_success(tmp_path):
315    """Test loading a valid hook with pre_process function."""
316    hook_file = tmp_path / "test_pre_hook.py"
317    hook_file.write_text("""
318def pre_process(context):
319    return {"prompt": context["prompt"] + " [modified]"}
320""")
321
322    config = {"hook": {"pre": str(hook_file)}}
323    hook_fn = load_pre_hook(config)
324    assert callable(hook_fn)
325
326    # Test the hook returns modifications
327    result = hook_fn({"prompt": "original"})
328    assert result == {"prompt": "original [modified]"}
329
330
331def test_load_pre_hook_missing_pre_process(tmp_path):
332    """Test that hook without pre_process function raises ValueError."""
333    hook_file = tmp_path / "bad_hook.py"
334    hook_file.write_text("""
335def other_function():
336    pass
337""")
338
339    config = {"hook": {"pre": str(hook_file)}}
340    try:
341        load_pre_hook(config)
342        assert False, "Should have raised ValueError"
343    except ValueError as e:
344        assert "must define a 'pre_process' function" in str(e)
345
346
347def test_load_pre_hook_not_callable(tmp_path):
348    """Test that hook with non-callable pre_process raises ValueError."""
349    hook_file = tmp_path / "bad_hook.py"
350    hook_file.write_text("""
351pre_process = "not a function"
352""")
353
354    config = {"hook": {"pre": str(hook_file)}}
355    try:
356        load_pre_hook(config)
357        assert False, "Should have raised ValueError"
358    except ValueError as e:
359        assert "'pre_process' must be callable" in str(e)
360
361
362def test_load_pre_hook_no_hook_config():
363    """Test that missing hook config returns None."""
364    assert load_pre_hook({}) is None
365    assert load_pre_hook({"hook": {}}) is None
366    assert load_pre_hook({"hook": {"post": "something"}}) is None
367
368
369def test_load_pre_hook_file_not_found(tmp_path):
370    """Test that nonexistent hook file raises ImportError."""
371    config = {"hook": {"pre": str(tmp_path / "nonexistent.py")}}
372    try:
373        load_pre_hook(config)
374        assert False, "Should have raised ImportError"
375    except ImportError as e:
376        assert "not found" in str(e)
377
378
379def test_pre_hook_invocation(tmp_path, monkeypatch):
380    """Test that agents.py invokes pre-hook and uses modified inputs."""
381    mod = importlib.import_module("think.agents")
382    copy_day(tmp_path)
383
384    import think.muse
385
386    monkeypatch.setattr(think.muse, "MUSE_DIR", tmp_path)
387
388    prompt_file = tmp_path / "prehooked_test.md"
389    prompt_file.write_text(
390        '{\n  "type": "generate",\n  "title": "Prehooked",\n  "schedule": "daily",\n  "priority": 10,\n  "output": "md",\n  "hook": {"pre": "prehooked_test"},\n  "instructions": {"system": "journal", "sources": {"transcripts": true, "percepts": true}}\n}\n\nOriginal prompt'
391    )
392
393    hook_file = tmp_path / "prehooked_test.py"
394    hook_file.write_text("""
395def pre_process(context):
396    # Verify context has expected fields
397    assert "transcript" in context
398    assert "prompt" in context
399    assert "system_instruction" in context
400    # Modify the prompt
401    return {"prompt": context["prompt"] + " [pre-processed]"}
402""")
403
404    # Track what generate_with_result receives
405    received_kwargs = {}
406
407    def mock_generate(*args, **kwargs):
408        received_kwargs.update(kwargs)
409        received_kwargs["contents"] = args[0] if args else kwargs.get("contents")
410        return MOCK_RESULT
411
412    import think.models
413
414    monkeypatch.setattr(think.models, "generate_with_result", mock_generate)
415    monkeypatch.setenv("GOOGLE_API_KEY", "x")
416    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
417
418    config = {
419        "name": "prehooked_test",
420        "day": "20240101",
421        "output": "md",
422        "provider": "google",
423        "model": "gemini-2.0-flash",
424    }
425
426    events = run_generator_with_config(mod, config, monkeypatch)
427
428    # Verify pre-hook modified the prompt - check in contents
429    contents = received_kwargs.get("contents", [])
430    # The prompt should contain [pre-processed]
431    prompt_found = any("[pre-processed]" in str(c) for c in contents)
432    assert prompt_found, f"Expected [pre-processed] in contents: {contents}"
433
434    # Verify generator still completed successfully
435    finish_events = [e for e in events if e["event"] == "finish"]
436    assert len(finish_events) == 1
437
438
439def test_both_pre_and_post_hooks(tmp_path, monkeypatch):
440    """Test that both pre and post hooks can be configured together."""
441    mod = importlib.import_module("think.agents")
442    copy_day(tmp_path)
443
444    import think.muse
445
446    monkeypatch.setattr(think.muse, "MUSE_DIR", tmp_path)
447
448    prompt_file = tmp_path / "both_hooks_test.md"
449    prompt_file.write_text(
450        '{\n  "type": "generate",\n  "title": "Both Hooks",\n  "schedule": "daily",\n  "priority": 10,\n  "output": "md",\n  "hook": {"pre": "both_hooks_test", "post": "both_hooks_test"},\n  "instructions": {"system": "journal", "sources": {"transcripts": true, "percepts": true}}\n}\n\nOriginal prompt'
451    )
452
453    hook_file = tmp_path / "both_hooks_test.py"
454    hook_file.write_text("""
455def pre_process(context):
456    return {"prompt": context["prompt"] + " [pre]"}
457
458def post_process(result, context):
459    return result + "\\n\\n[post]"
460""")
461
462    received_kwargs = {}
463
464    def mock_generate(*args, **kwargs):
465        received_kwargs.update(kwargs)
466        received_kwargs["contents"] = args[0] if args else kwargs.get("contents")
467        return MOCK_RESULT
468
469    import think.models
470
471    monkeypatch.setattr(think.models, "generate_with_result", mock_generate)
472    monkeypatch.setenv("GOOGLE_API_KEY", "x")
473    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
474
475    config = {
476        "name": "both_hooks_test",
477        "day": "20240101",
478        "output": "md",
479        "provider": "google",
480        "model": "gemini-2.0-flash",
481    }
482
483    events = run_generator_with_config(mod, config, monkeypatch)
484
485    # Verify pre-hook modified the prompt - check in contents
486    contents = received_kwargs.get("contents", [])
487    prompt_found = any("[pre]" in str(c) for c in contents)
488    assert prompt_found, f"Expected [pre] in contents: {contents}"
489
490    # Verify post-hook modified the result
491    finish_events = [e for e in events if e["event"] == "finish"]
492    assert len(finish_events) == 1
493    assert "[post]" in finish_events[0]["result"]