tests/test_talent_cli.py at main · solpbc.org/solstone

solpbc.org / solstone
fork atom
personal memory agent
fork atom
solstone / tests / test_talent_cli.py
at main 648 lines 21 kB view raw
wrap content
Jer Miller chore: remove legacy onboarding dead code 2hrs ago
f89a5ee7
  1# SPDX-License-Identifier: AGPL-3.0-only
  2# Copyright (c) 2026 sol pbc
  3
  4"""Tests for the sol talent CLI."""
  5
  6import json
  7
  8import pytest
  9
 10from think.talent_cli import (
 11    _collect_configs,
 12    _format_bytes,
 13    _format_cost,
 14    _format_tags,
 15    _parse_run_stats,
 16    _scan_variables,
 17    json_output,
 18    list_prompts,
 19    log_run,
 20    logs_runs,
 21    show_prompt,
 22)
 23
 24
 25def test_collect_configs_returns_prompts():
 26    """All configs include known system prompts."""
 27    configs = _collect_configs(include_disabled=True)
 28    assert "flow" in configs
 29    assert "sense" in configs
 30    assert "chat" in configs
 31
 32
 33def test_collect_configs_excludes_disabled_by_default():
 34    """Disabled prompts are excluded unless include_disabled is set."""
 35    without = _collect_configs(include_disabled=False)
 36    with_disabled = _collect_configs(include_disabled=True)
 37    # include_disabled should return at least as many configs
 38    assert len(with_disabled) >= len(without)
 39    assert "flow" in without
 40    assert "flow" in with_disabled
 41
 42
 43def test_collect_configs_filter_schedule():
 44    """Schedule filter returns only matching prompts."""
 45    daily = _collect_configs(schedule="daily", include_disabled=True)
 46    for key, info in daily.items():
 47        assert info.get("schedule") == "daily", f"{key} should be daily"
 48
 49    segment = _collect_configs(schedule="segment", include_disabled=True)
 50    for key, info in segment.items():
 51        assert info.get("schedule") == "segment", f"{key} should be segment"
 52
 53    # No overlap
 54    assert not set(daily.keys()) & set(segment.keys())
 55
 56    activity = _collect_configs(schedule="activity", include_disabled=True)
 57    for key, info in activity.items():
 58        assert info.get("schedule") == "activity", f"{key} should be activity"
 59
 60    # decisions is activity-scheduled
 61    assert "decisions" in activity
 62
 63
 64def test_collect_configs_filter_source():
 65    """Source filter returns only matching prompts."""
 66    system = _collect_configs(source="system", include_disabled=True)
 67    for key, info in system.items():
 68        assert info.get("source") == "system", f"{key} should be system"
 69
 70    app = _collect_configs(source="app", include_disabled=True)
 71    for key, info in app.items():
 72        assert info.get("source") == "app", f"{key} should be app"
 73
 74
 75def test_format_tags_hook():
 76    """Format tags shows compact output, hook, disabled, and FAIL tags."""
 77    # Output format tags
 78    assert _format_tags({"output": "md"}) == "md"
 79    assert _format_tags({"output": "json"}) == "json"
 80    assert _format_tags({}) == ""
 81
 82    # Hook tags (compact, no =name suffix)
 83    assert _format_tags({"hook": {"post": "occurrence"}}) == "post"
 84    assert _format_tags({"hook": {"pre": "prep"}}) == "pre"
 85    assert _format_tags({"hook": {"pre": "prep", "post": "process"}}) == "pre post"
 86
 87    # Disabled
 88    assert _format_tags({"disabled": True}) == "disabled"
 89
 90    # FAIL tag
 91    assert _format_tags({}, failed=True) == "FAIL"
 92    assert _format_tags({"output": "md"}, failed=True) == "md FAIL"
 93
 94    # Combined: output + hooks + disabled + FAIL
 95    tags = _format_tags(
 96        {"output": "md", "hook": {"post": "occurrence"}, "disabled": True},
 97        failed=True,
 98    )
 99    assert tags == "md post disabled FAIL"
100
101
102def test_scan_variables():
103    """Variable scanning finds template variables in prompt body."""
104    assert "name" in _scan_variables("Hello $name, welcome")
105    assert "daily_preamble" in _scan_variables("$daily_preamble\n\n# Title")
106    assert _scan_variables("No variables here") == []
107    # Deduplicates
108    result = _scan_variables("$foo and $bar and $foo again")
109    assert result == ["foo", "bar"]
110
111
112def test_list_prompts_output(capsys):
113    """List view outputs expected groups and prompts with column layout."""
114    list_prompts()
115    output = capsys.readouterr().out
116
117    # Column header
118    assert "NAME" in output
119    assert "TITLE" in output
120    assert "LAST RUN" in output
121    assert "TAGS" in output
122    assert "OUTPUT" not in output
123
124    # Group headers
125    assert "segment:" in output
126    assert "daily:" in output
127    assert "activity:" in output
128
129    # Prompt names
130    assert "activity" in output
131    assert "flow" in output
132
133    # Last run column is present
134    assert "LAST RUN" in output
135
136
137def test_list_prompts_schedule_filter(capsys):
138    """Schedule filter shows only matching group."""
139    list_prompts(schedule="segment")
140    output = capsys.readouterr().out
141
142    assert "sense" in output
143    # Should not show daily-only prompts
144    # (but don't assert group headers since they're suppressed with filter)
145
146
147def test_list_prompts_disabled_shown(capsys):
148    """--disabled includes disabled prompts (currently none after cleanup)."""
149    list_prompts(include_disabled=True)
150    output = capsys.readouterr().out
151
152    # all agents should appear in the listing
153    assert "flow" in output
154
155
156def test_show_prompt_known(capsys):
157    """Detail view shows expected fields for a known prompt."""
158    show_prompt("flow")
159    output = capsys.readouterr().out
160
161    assert "talent/flow.md" in output
162    assert "title:" in output
163    assert "schedule:" in output
164    assert "daily" in output
165    assert "hook:" in output
166    assert "occurrence" in output
167    assert "variables:" in output
168    assert "$daily_preamble" in output
169    assert "body:" in output
170    assert "lines" in output
171
172
173def test_show_prompt_not_found(capsys):
174    """Detail view exits with error for unknown prompt."""
175    with pytest.raises(SystemExit):
176        show_prompt("nonexistent_prompt_xyz")
177
178    output = capsys.readouterr().err
179    assert "not found" in output.lower()
180
181
182def test_json_output_format(capsys):
183    """JSON output produces valid JSONL with file field."""
184    json_output()
185    output = capsys.readouterr().out
186
187    lines = [x for x in output.strip().splitlines() if x.strip()]
188    assert len(lines) > 0
189
190    for line in lines:
191        record = json.loads(line)
192        assert "file" in record, f"Missing 'file' key in: {line}"
193        assert record["file"].endswith(".md")
194
195
196def test_json_output_contains_known_prompts(capsys):
197    """JSON output includes known prompts with expected fields."""
198    json_output(include_disabled=True)
199    output = capsys.readouterr().out
200
201    records = [json.loads(x) for x in output.strip().splitlines() if x.strip()]
202    files = {r["file"] for r in records}
203    assert any("flow.md" in f for f in files)
204    assert any("sense.md" in f for f in files)
205
206    # Check a specific record has expected fields
207    flow = next(r for r in records if "flow.md" in r["file"])
208    assert "title" in flow
209    assert "schedule" in flow
210
211
212def test_json_output_schedule_filter(capsys):
213    """JSON output respects schedule filter."""
214    json_output(schedule="segment")
215    output = capsys.readouterr().out
216
217    records = [json.loads(x) for x in output.strip().splitlines() if x.strip()]
218    for r in records:
219        assert r.get("schedule") == "segment", f"Expected segment: {r}"
220
221
222def test_show_prompt_as_json(capsys):
223    """Detail view with --json outputs single JSONL record."""
224    show_prompt("flow", as_json=True)
225    output = capsys.readouterr().out
226
227    lines = [x for x in output.strip().splitlines() if x.strip()]
228    assert len(lines) == 1
229
230    record = json.loads(lines[0])
231    assert record["file"].endswith("flow.md")
232    assert "title" in record
233    assert "schedule" in record
234    # Should not contain expanded instruction text
235    assert "system_instruction" not in record
236
237
238def test_truncate_content():
239    """Content truncation works correctly."""
240    from think.talent_cli import _truncate_content
241
242    # Short content not truncated
243    short = "line1\nline2\nline3"
244    result, omitted = _truncate_content(short, max_lines=10)
245    assert result == short
246    assert omitted == 0
247
248    # Long content truncated
249    long = "\n".join(f"line{i}" for i in range(200))
250    result, omitted = _truncate_content(long, max_lines=100)
251    assert omitted == 100
252    assert "lines omitted" in result
253    assert "line0" in result  # First lines kept
254    assert "line199" in result  # Last lines kept
255
256
257def test_yesterday():
258    """Yesterday helper returns correct format."""
259    from think.talent_cli import _yesterday
260
261    result = _yesterday()
262    assert len(result) == 8
263    assert result.isdigit()
264
265
266def test_show_prompt_context_segment_validation(capsys):
267    """Segment-scheduled prompts require --segment."""
268    from think.talent_cli import show_prompt_context
269
270    with pytest.raises(SystemExit):
271        show_prompt_context("screen", day="20260101")
272
273    output = capsys.readouterr().err
274    assert "segment-scheduled" in output.lower()
275
276
277def test_show_prompt_context_multi_facet_validation(capsys):
278    """Multi-facet prompts require --facet."""
279    from think.talent_cli import show_prompt_context
280
281    with pytest.raises(SystemExit):
282        show_prompt_context("entities:entities")
283
284    output = capsys.readouterr().err
285    assert "multi-facet" in output.lower()
286
287
288def test_show_prompt_context_day_format_validation(capsys):
289    """Day argument must be YYYYMMDD format."""
290    from think.talent_cli import show_prompt_context
291
292    # Too short
293    with pytest.raises(SystemExit):
294        show_prompt_context("flow", day="2026")
295
296    output = capsys.readouterr().err
297    assert "invalid --day format" in output.lower()
298
299    # Non-numeric
300    with pytest.raises(SystemExit):
301        show_prompt_context("flow", day="abcdefgh")
302
303    output = capsys.readouterr().err
304    assert "invalid --day format" in output.lower()
305
306
307def test_logs_runs_default(capsys):
308    """Logs shows recent runs from fixture day-index files."""
309    logs_runs(count=50)
310    output = capsys.readouterr().out
311
312    # Should have runs from all fixture days (original + R&J)
313    assert "default" in output or "unified" in output
314    assert "flow" in output
315    assert "activity" in output
316    assert "entities" in output
317    assert "meetings" in output
318    assert "knowledge_graph" in output
319    # Error run should show ✗
320    assert "\u2717" in output
321    # Completed runs should show ✓
322    assert "\u2713" in output
323
324
325def test_logs_runs_filter_agent(capsys):
326    """Logs filters to a specific agent."""
327    logs_runs(agent="default")
328    output = capsys.readouterr().out
329
330    lines = [line for line in output.strip().splitlines() if line.strip()]
331    # fixture has 2 "default" runs in 20231114 + 2 from R&J (20260305, 20260310)
332    assert len(lines) == 4
333    for line in lines:
334        assert "default" in line
335    # Should NOT contain other agents
336    assert "flow" not in output
337    assert "activity" not in output
338
339
340def test_logs_runs_count_limit(capsys):
341    """Logs respects count limit."""
342    logs_runs(count=2)
343    output = capsys.readouterr().out
344
345    lines = [line for line in output.strip().splitlines() if line.strip()]
346    assert len(lines) == 2
347
348
349def test_logs_runs_no_results(capsys):
350    """Logs with unknown agent produces empty output."""
351    logs_runs(agent="nonexistent_agent_xyz")
352    output = capsys.readouterr().out
353    assert output.strip() == ""
354
355
356def test_logs_runs_new_columns(capsys):
357    """Logs output includes enriched columns for runs with JSONL files."""
358    logs_runs(count=50)
359    output = capsys.readouterr().out
360    lines = [line for line in output.strip().splitlines() if line.strip()]
361
362    # Find the line for agent_id 1700000000001 (has JSONL file)
363    enriched_line = None
364    for line in lines:
365        if "1700000000001" in line:
366            enriched_line = line
367            break
368    assert enriched_line is not None
369
370    # Should have numeric event/tool counts (not "-")
371    # The fixture has 7 events total, 6 non-request, 1 tool_start
372    assert "  6  " in enriched_line  # events
373    assert "  1  " in enriched_line  # tools
374
375    # Lines without JSONL files should show "-" for enriched columns
376    # (most lines lack JSONL files)
377    dash_count = sum(1 for line in lines if "  -  " in line)
378    assert dash_count > 0
379
380
381def test_logs_runs_day_filter(capsys):
382    """--day filters to a specific day."""
383    logs_runs(day="20231114")
384    output = capsys.readouterr().out
385    lines = [line for line in output.strip().splitlines() if line.strip()]
386    # 20231114 has 4 records
387    assert len(lines) == 4
388    # All should be from 20231114
389    for line in lines:
390        assert "1700000" in line  # all agent_ids from that day start with 1700000
391
392
393def test_logs_runs_day_filter_no_match(capsys):
394    """--day with nonexistent day produces empty output."""
395    logs_runs(day="20990101")
396    output = capsys.readouterr().out
397    assert output.strip() == ""
398
399
400def test_logs_runs_day_invalid(capsys):
401    """--day with invalid format prints error."""
402    with pytest.raises(SystemExit):
403        logs_runs(day="bad")
404    output = capsys.readouterr().err
405    assert "invalid --day format" in output.lower()
406
407
408def test_logs_runs_errors_filter(capsys):
409    """--errors shows only error runs."""
410    logs_runs(errors=True)
411    output = capsys.readouterr().out
412    lines = [line for line in output.strip().splitlines() if line.strip()]
413    # Only flow on 20231114 has status "error"
414    assert len(lines) == 1
415    assert "flow" in lines[0]
416    assert "✗" in lines[0]
417
418
419def test_logs_runs_daily_filter(capsys):
420    """--daily shows only daily-scheduled runs."""
421    logs_runs(daily=True)
422    output = capsys.readouterr().out
423    lines = [line for line in output.strip().splitlines() if line.strip()]
424    # Daily runs: entities (20231113, schedule=daily), default x2 (20231114,
425    # schedule=daily + legacy fallback)
426    # Should NOT include flow (segment) or activity
427    assert "flow" not in output
428    assert "activity" not in output
429    for line in lines:
430        assert any(
431            name in line
432            for name in ["default", "entities", "meetings", "knowledge_graph"]
433        )
434
435
436def test_logs_runs_daily_bumps_count(capsys):
437    """--daily bumps default count to 50."""
438    # With only 6 total records in fixtures, verify explicit count still applies.
439    logs_runs(daily=True, count=1)
440    output = capsys.readouterr().out
441    lines = [line for line in output.strip().splitlines() if line.strip()]
442    assert len(lines) == 1
443
444
445def test_logs_runs_filter_composition(capsys):
446    """Filters compose with AND logic."""
447    logs_runs(day="20231114", errors=True)
448    output = capsys.readouterr().out
449    lines = [line for line in output.strip().splitlines() if line.strip()]
450    # Only flow on 20231114 is an error
451    assert len(lines) == 1
452    assert "flow" in lines[0]
453
454
455def test_logs_runs_summary(capsys):
456    """--summary shows grouped aggregation."""
457    logs_runs(summary=True, count=50)
458    output = capsys.readouterr().out
459    # Should have agent names (original + R&J)
460    assert "default" in output
461    assert "flow" in output
462    assert "entities" in output
463    assert "activity" in output
464    assert "meetings" in output
465    assert "knowledge_graph" in output
466    # Should have totals line
467    assert "total" in output
468    # Should show pass/fail symbols
469    assert "✓" in output
470    assert "✗" in output
471
472
473def test_logs_runs_daily_summary(capsys):
474    """--daily --summary shows only daily runs in summary."""
475    logs_runs(daily=True, summary=True)
476    output = capsys.readouterr().out
477    # Only daily agents (entities, default, meetings, knowledge_graph)
478    assert "flow" not in output
479    assert "activity" not in output
480    assert "default" in output
481    assert "entities" in output
482    assert "meetings" in output
483    assert "knowledge_graph" in output
484    assert "total" in output
485
486
487def test_parse_run_stats():
488    """Parse run stats extracts correct counts from fixture JSONL."""
489    from pathlib import Path
490
491    jsonl = Path("tests/fixtures/journal/agents/default/1700000000001.jsonl")
492    stats = _parse_run_stats(jsonl)
493    assert stats["event_count"] == 6  # all except request
494    assert stats["tool_count"] == 1  # one tool_start
495    assert stats["model"] == "gpt-4o"
496    assert stats["usage"] == {"input_tokens": 150, "output_tokens": 80}
497    assert stats["request"] is not None
498    assert stats["request"]["prompt"] == "Search for meetings about project updates"
499
500
501def test_parse_run_stats_error():
502    """Parse run stats handles error run JSONL correctly."""
503    from pathlib import Path
504
505    jsonl = Path("tests/fixtures/journal/agents/flow/1700000000002.jsonl")
506    stats = _parse_run_stats(jsonl)
507    assert stats["event_count"] == 2  # start + error (not request)
508    assert stats["tool_count"] == 0
509    assert stats["model"] == "claude-3-haiku"
510    assert stats["usage"] is None
511
512
513def test_format_bytes():
514    """Byte formatting produces human-readable strings."""
515    assert _format_bytes(0) == "0"
516    assert _format_bytes(500) == "500"
517    assert _format_bytes(999) == "999"
518    assert _format_bytes(1000) == "1.0K"
519    assert _format_bytes(1200) == "1.2K"
520    assert _format_bytes(34000) == "34.0K"
521    assert _format_bytes(1500000) == "1.5M"
522
523
524def test_format_cost():
525    """Cost formatting shows rounded cents."""
526    assert _format_cost(None) == "-"
527    assert _format_cost(0.0) == "0¢"
528    assert _format_cost(0.001) == "<1¢"
529    assert _format_cost(0.02) == "2¢"
530    assert _format_cost(0.10) == "10¢"
531    assert _format_cost(1.50) == "150¢"
532
533
534def test_log_run_default(capsys):
535    """Log run shows one-line-per-event output."""
536    log_run("1700000000001")
537    output = capsys.readouterr().out
538    lines = output.strip().splitlines()
539
540    # Fixture has 7 events
541    assert len(lines) == 7
542
543    # Each line should be ≤100 chars
544    for line in lines:
545        assert len(line) <= 100, f"Line too long ({len(line)}): {line}"
546
547    # Check event type labels appear
548    full_output = output
549    assert "request" in full_output
550    assert "start" in full_output
551    assert "think" in full_output
552    assert "tool" in full_output
553    assert "tool_end" in full_output
554    assert "updated" in full_output
555    assert "finish" in full_output
556
557
558def test_log_run_json(capsys):
559    """Log run --json outputs raw JSONL."""
560    log_run("1700000000001", json_mode=True)
561    output = capsys.readouterr().out
562    lines = [line for line in output.strip().splitlines() if line.strip()]
563
564    assert len(lines) == 7
565    # Each line should be valid JSON
566    for line in lines:
567        parsed = json.loads(line)
568        assert "event" in parsed
569
570
571def test_log_run_full(capsys):
572    """Log run --full shows expanded content with escaped newlines."""
573    log_run("1700000000001", full=True)
574    output = capsys.readouterr().out
575
576    # The thinking event in the fixture has actual newlines in "content"
577    # In --full mode, these should appear as literal \n
578    assert "\\n" in output
579
580    # Lines can exceed 100 chars in full mode
581    lines = output.strip().splitlines()
582    assert len(lines) == 7
583
584
585def test_log_run_missing():
586    """Log run with unknown ID exits with error."""
587    with pytest.raises(SystemExit):
588        log_run("nonexistent_id_12345")
589
590
591def test_log_run_error_run(capsys):
592    """Log run displays error events correctly."""
593    log_run("1700000000002")
594    output = capsys.readouterr().out
595    lines = output.strip().splitlines()
596    assert len(lines) == 3  # request, start, error
597    assert "error" in output
598    assert "Rate limit" in output
599
600
601def test_show_prompt_context_activity_requires_facet(capsys):
602    """Activity-scheduled prompts require --facet."""
603    from think.talent_cli import show_prompt_context
604
605    with pytest.raises(SystemExit):
606        show_prompt_context("decisions", day="20260214")
607
608    output = capsys.readouterr().err
609    assert "activity-scheduled" in output.lower()
610    assert "--facet" in output
611
612
613def test_show_prompt_context_activity_requires_activity_id(capsys):
614    """Activity-scheduled prompts require --activity and list available IDs."""
615    from think.talent_cli import show_prompt_context
616
617    with pytest.raises(SystemExit):
618        show_prompt_context("decisions", day="20260214", facet="full-featured")
619
620    output = capsys.readouterr().err
621    assert "--activity" in output
622    assert "coding_093000_300" in output
623    assert "meeting_140000_300" in output
624
625
626def test_show_prompt_context_activity_not_found(capsys):
627    """Activity-scheduled prompt with unknown activity ID errors."""
628    from think.talent_cli import show_prompt_context
629
630    with pytest.raises(SystemExit):
631        show_prompt_context(
632            "decisions",
633            day="20260214",
634            facet="full-featured",
635            activity="nonexistent_999",
636        )
637
638    output = capsys.readouterr().err
639    assert "not found" in output.lower()
640
641
642def test_list_prompts_activity_group(capsys):
643    """List view includes activity group with decisions agent."""
644    list_prompts()
645    output = capsys.readouterr().out
646
647    assert "activity:" in output
648    assert "decisions" in output