personal memory agent
at main 648 lines 21 kB view raw
1# SPDX-License-Identifier: AGPL-3.0-only 2# Copyright (c) 2026 sol pbc 3 4"""Tests for the sol talent CLI.""" 5 6import json 7 8import pytest 9 10from think.talent_cli import ( 11 _collect_configs, 12 _format_bytes, 13 _format_cost, 14 _format_tags, 15 _parse_run_stats, 16 _scan_variables, 17 json_output, 18 list_prompts, 19 log_run, 20 logs_runs, 21 show_prompt, 22) 23 24 25def test_collect_configs_returns_prompts(): 26 """All configs include known system prompts.""" 27 configs = _collect_configs(include_disabled=True) 28 assert "flow" in configs 29 assert "sense" in configs 30 assert "chat" in configs 31 32 33def test_collect_configs_excludes_disabled_by_default(): 34 """Disabled prompts are excluded unless include_disabled is set.""" 35 without = _collect_configs(include_disabled=False) 36 with_disabled = _collect_configs(include_disabled=True) 37 # include_disabled should return at least as many configs 38 assert len(with_disabled) >= len(without) 39 assert "flow" in without 40 assert "flow" in with_disabled 41 42 43def test_collect_configs_filter_schedule(): 44 """Schedule filter returns only matching prompts.""" 45 daily = _collect_configs(schedule="daily", include_disabled=True) 46 for key, info in daily.items(): 47 assert info.get("schedule") == "daily", f"{key} should be daily" 48 49 segment = _collect_configs(schedule="segment", include_disabled=True) 50 for key, info in segment.items(): 51 assert info.get("schedule") == "segment", f"{key} should be segment" 52 53 # No overlap 54 assert not set(daily.keys()) & set(segment.keys()) 55 56 activity = _collect_configs(schedule="activity", include_disabled=True) 57 for key, info in activity.items(): 58 assert info.get("schedule") == "activity", f"{key} should be activity" 59 60 # decisions is activity-scheduled 61 assert "decisions" in activity 62 63 64def test_collect_configs_filter_source(): 65 """Source filter returns only matching prompts.""" 66 system = _collect_configs(source="system", include_disabled=True) 67 for key, info in system.items(): 68 assert info.get("source") == "system", f"{key} should be system" 69 70 app = _collect_configs(source="app", include_disabled=True) 71 for key, info in app.items(): 72 assert info.get("source") == "app", f"{key} should be app" 73 74 75def test_format_tags_hook(): 76 """Format tags shows compact output, hook, disabled, and FAIL tags.""" 77 # Output format tags 78 assert _format_tags({"output": "md"}) == "md" 79 assert _format_tags({"output": "json"}) == "json" 80 assert _format_tags({}) == "" 81 82 # Hook tags (compact, no =name suffix) 83 assert _format_tags({"hook": {"post": "occurrence"}}) == "post" 84 assert _format_tags({"hook": {"pre": "prep"}}) == "pre" 85 assert _format_tags({"hook": {"pre": "prep", "post": "process"}}) == "pre post" 86 87 # Disabled 88 assert _format_tags({"disabled": True}) == "disabled" 89 90 # FAIL tag 91 assert _format_tags({}, failed=True) == "FAIL" 92 assert _format_tags({"output": "md"}, failed=True) == "md FAIL" 93 94 # Combined: output + hooks + disabled + FAIL 95 tags = _format_tags( 96 {"output": "md", "hook": {"post": "occurrence"}, "disabled": True}, 97 failed=True, 98 ) 99 assert tags == "md post disabled FAIL" 100 101 102def test_scan_variables(): 103 """Variable scanning finds template variables in prompt body.""" 104 assert "name" in _scan_variables("Hello $name, welcome") 105 assert "daily_preamble" in _scan_variables("$daily_preamble\n\n# Title") 106 assert _scan_variables("No variables here") == [] 107 # Deduplicates 108 result = _scan_variables("$foo and $bar and $foo again") 109 assert result == ["foo", "bar"] 110 111 112def test_list_prompts_output(capsys): 113 """List view outputs expected groups and prompts with column layout.""" 114 list_prompts() 115 output = capsys.readouterr().out 116 117 # Column header 118 assert "NAME" in output 119 assert "TITLE" in output 120 assert "LAST RUN" in output 121 assert "TAGS" in output 122 assert "OUTPUT" not in output 123 124 # Group headers 125 assert "segment:" in output 126 assert "daily:" in output 127 assert "activity:" in output 128 129 # Prompt names 130 assert "activity" in output 131 assert "flow" in output 132 133 # Last run column is present 134 assert "LAST RUN" in output 135 136 137def test_list_prompts_schedule_filter(capsys): 138 """Schedule filter shows only matching group.""" 139 list_prompts(schedule="segment") 140 output = capsys.readouterr().out 141 142 assert "sense" in output 143 # Should not show daily-only prompts 144 # (but don't assert group headers since they're suppressed with filter) 145 146 147def test_list_prompts_disabled_shown(capsys): 148 """--disabled includes disabled prompts (currently none after cleanup).""" 149 list_prompts(include_disabled=True) 150 output = capsys.readouterr().out 151 152 # all agents should appear in the listing 153 assert "flow" in output 154 155 156def test_show_prompt_known(capsys): 157 """Detail view shows expected fields for a known prompt.""" 158 show_prompt("flow") 159 output = capsys.readouterr().out 160 161 assert "talent/flow.md" in output 162 assert "title:" in output 163 assert "schedule:" in output 164 assert "daily" in output 165 assert "hook:" in output 166 assert "occurrence" in output 167 assert "variables:" in output 168 assert "$daily_preamble" in output 169 assert "body:" in output 170 assert "lines" in output 171 172 173def test_show_prompt_not_found(capsys): 174 """Detail view exits with error for unknown prompt.""" 175 with pytest.raises(SystemExit): 176 show_prompt("nonexistent_prompt_xyz") 177 178 output = capsys.readouterr().err 179 assert "not found" in output.lower() 180 181 182def test_json_output_format(capsys): 183 """JSON output produces valid JSONL with file field.""" 184 json_output() 185 output = capsys.readouterr().out 186 187 lines = [x for x in output.strip().splitlines() if x.strip()] 188 assert len(lines) > 0 189 190 for line in lines: 191 record = json.loads(line) 192 assert "file" in record, f"Missing 'file' key in: {line}" 193 assert record["file"].endswith(".md") 194 195 196def test_json_output_contains_known_prompts(capsys): 197 """JSON output includes known prompts with expected fields.""" 198 json_output(include_disabled=True) 199 output = capsys.readouterr().out 200 201 records = [json.loads(x) for x in output.strip().splitlines() if x.strip()] 202 files = {r["file"] for r in records} 203 assert any("flow.md" in f for f in files) 204 assert any("sense.md" in f for f in files) 205 206 # Check a specific record has expected fields 207 flow = next(r for r in records if "flow.md" in r["file"]) 208 assert "title" in flow 209 assert "schedule" in flow 210 211 212def test_json_output_schedule_filter(capsys): 213 """JSON output respects schedule filter.""" 214 json_output(schedule="segment") 215 output = capsys.readouterr().out 216 217 records = [json.loads(x) for x in output.strip().splitlines() if x.strip()] 218 for r in records: 219 assert r.get("schedule") == "segment", f"Expected segment: {r}" 220 221 222def test_show_prompt_as_json(capsys): 223 """Detail view with --json outputs single JSONL record.""" 224 show_prompt("flow", as_json=True) 225 output = capsys.readouterr().out 226 227 lines = [x for x in output.strip().splitlines() if x.strip()] 228 assert len(lines) == 1 229 230 record = json.loads(lines[0]) 231 assert record["file"].endswith("flow.md") 232 assert "title" in record 233 assert "schedule" in record 234 # Should not contain expanded instruction text 235 assert "system_instruction" not in record 236 237 238def test_truncate_content(): 239 """Content truncation works correctly.""" 240 from think.talent_cli import _truncate_content 241 242 # Short content not truncated 243 short = "line1\nline2\nline3" 244 result, omitted = _truncate_content(short, max_lines=10) 245 assert result == short 246 assert omitted == 0 247 248 # Long content truncated 249 long = "\n".join(f"line{i}" for i in range(200)) 250 result, omitted = _truncate_content(long, max_lines=100) 251 assert omitted == 100 252 assert "lines omitted" in result 253 assert "line0" in result # First lines kept 254 assert "line199" in result # Last lines kept 255 256 257def test_yesterday(): 258 """Yesterday helper returns correct format.""" 259 from think.talent_cli import _yesterday 260 261 result = _yesterday() 262 assert len(result) == 8 263 assert result.isdigit() 264 265 266def test_show_prompt_context_segment_validation(capsys): 267 """Segment-scheduled prompts require --segment.""" 268 from think.talent_cli import show_prompt_context 269 270 with pytest.raises(SystemExit): 271 show_prompt_context("screen", day="20260101") 272 273 output = capsys.readouterr().err 274 assert "segment-scheduled" in output.lower() 275 276 277def test_show_prompt_context_multi_facet_validation(capsys): 278 """Multi-facet prompts require --facet.""" 279 from think.talent_cli import show_prompt_context 280 281 with pytest.raises(SystemExit): 282 show_prompt_context("entities:entities") 283 284 output = capsys.readouterr().err 285 assert "multi-facet" in output.lower() 286 287 288def test_show_prompt_context_day_format_validation(capsys): 289 """Day argument must be YYYYMMDD format.""" 290 from think.talent_cli import show_prompt_context 291 292 # Too short 293 with pytest.raises(SystemExit): 294 show_prompt_context("flow", day="2026") 295 296 output = capsys.readouterr().err 297 assert "invalid --day format" in output.lower() 298 299 # Non-numeric 300 with pytest.raises(SystemExit): 301 show_prompt_context("flow", day="abcdefgh") 302 303 output = capsys.readouterr().err 304 assert "invalid --day format" in output.lower() 305 306 307def test_logs_runs_default(capsys): 308 """Logs shows recent runs from fixture day-index files.""" 309 logs_runs(count=50) 310 output = capsys.readouterr().out 311 312 # Should have runs from all fixture days (original + R&J) 313 assert "default" in output or "unified" in output 314 assert "flow" in output 315 assert "activity" in output 316 assert "entities" in output 317 assert "meetings" in output 318 assert "knowledge_graph" in output 319 # Error run should show ✗ 320 assert "\u2717" in output 321 # Completed runs should show ✓ 322 assert "\u2713" in output 323 324 325def test_logs_runs_filter_agent(capsys): 326 """Logs filters to a specific agent.""" 327 logs_runs(agent="default") 328 output = capsys.readouterr().out 329 330 lines = [line for line in output.strip().splitlines() if line.strip()] 331 # fixture has 2 "default" runs in 20231114 + 2 from R&J (20260305, 20260310) 332 assert len(lines) == 4 333 for line in lines: 334 assert "default" in line 335 # Should NOT contain other agents 336 assert "flow" not in output 337 assert "activity" not in output 338 339 340def test_logs_runs_count_limit(capsys): 341 """Logs respects count limit.""" 342 logs_runs(count=2) 343 output = capsys.readouterr().out 344 345 lines = [line for line in output.strip().splitlines() if line.strip()] 346 assert len(lines) == 2 347 348 349def test_logs_runs_no_results(capsys): 350 """Logs with unknown agent produces empty output.""" 351 logs_runs(agent="nonexistent_agent_xyz") 352 output = capsys.readouterr().out 353 assert output.strip() == "" 354 355 356def test_logs_runs_new_columns(capsys): 357 """Logs output includes enriched columns for runs with JSONL files.""" 358 logs_runs(count=50) 359 output = capsys.readouterr().out 360 lines = [line for line in output.strip().splitlines() if line.strip()] 361 362 # Find the line for agent_id 1700000000001 (has JSONL file) 363 enriched_line = None 364 for line in lines: 365 if "1700000000001" in line: 366 enriched_line = line 367 break 368 assert enriched_line is not None 369 370 # Should have numeric event/tool counts (not "-") 371 # The fixture has 7 events total, 6 non-request, 1 tool_start 372 assert " 6 " in enriched_line # events 373 assert " 1 " in enriched_line # tools 374 375 # Lines without JSONL files should show "-" for enriched columns 376 # (most lines lack JSONL files) 377 dash_count = sum(1 for line in lines if " - " in line) 378 assert dash_count > 0 379 380 381def test_logs_runs_day_filter(capsys): 382 """--day filters to a specific day.""" 383 logs_runs(day="20231114") 384 output = capsys.readouterr().out 385 lines = [line for line in output.strip().splitlines() if line.strip()] 386 # 20231114 has 4 records 387 assert len(lines) == 4 388 # All should be from 20231114 389 for line in lines: 390 assert "1700000" in line # all agent_ids from that day start with 1700000 391 392 393def test_logs_runs_day_filter_no_match(capsys): 394 """--day with nonexistent day produces empty output.""" 395 logs_runs(day="20990101") 396 output = capsys.readouterr().out 397 assert output.strip() == "" 398 399 400def test_logs_runs_day_invalid(capsys): 401 """--day with invalid format prints error.""" 402 with pytest.raises(SystemExit): 403 logs_runs(day="bad") 404 output = capsys.readouterr().err 405 assert "invalid --day format" in output.lower() 406 407 408def test_logs_runs_errors_filter(capsys): 409 """--errors shows only error runs.""" 410 logs_runs(errors=True) 411 output = capsys.readouterr().out 412 lines = [line for line in output.strip().splitlines() if line.strip()] 413 # Only flow on 20231114 has status "error" 414 assert len(lines) == 1 415 assert "flow" in lines[0] 416 assert "" in lines[0] 417 418 419def test_logs_runs_daily_filter(capsys): 420 """--daily shows only daily-scheduled runs.""" 421 logs_runs(daily=True) 422 output = capsys.readouterr().out 423 lines = [line for line in output.strip().splitlines() if line.strip()] 424 # Daily runs: entities (20231113, schedule=daily), default x2 (20231114, 425 # schedule=daily + legacy fallback) 426 # Should NOT include flow (segment) or activity 427 assert "flow" not in output 428 assert "activity" not in output 429 for line in lines: 430 assert any( 431 name in line 432 for name in ["default", "entities", "meetings", "knowledge_graph"] 433 ) 434 435 436def test_logs_runs_daily_bumps_count(capsys): 437 """--daily bumps default count to 50.""" 438 # With only 6 total records in fixtures, verify explicit count still applies. 439 logs_runs(daily=True, count=1) 440 output = capsys.readouterr().out 441 lines = [line for line in output.strip().splitlines() if line.strip()] 442 assert len(lines) == 1 443 444 445def test_logs_runs_filter_composition(capsys): 446 """Filters compose with AND logic.""" 447 logs_runs(day="20231114", errors=True) 448 output = capsys.readouterr().out 449 lines = [line for line in output.strip().splitlines() if line.strip()] 450 # Only flow on 20231114 is an error 451 assert len(lines) == 1 452 assert "flow" in lines[0] 453 454 455def test_logs_runs_summary(capsys): 456 """--summary shows grouped aggregation.""" 457 logs_runs(summary=True, count=50) 458 output = capsys.readouterr().out 459 # Should have agent names (original + R&J) 460 assert "default" in output 461 assert "flow" in output 462 assert "entities" in output 463 assert "activity" in output 464 assert "meetings" in output 465 assert "knowledge_graph" in output 466 # Should have totals line 467 assert "total" in output 468 # Should show pass/fail symbols 469 assert "" in output 470 assert "" in output 471 472 473def test_logs_runs_daily_summary(capsys): 474 """--daily --summary shows only daily runs in summary.""" 475 logs_runs(daily=True, summary=True) 476 output = capsys.readouterr().out 477 # Only daily agents (entities, default, meetings, knowledge_graph) 478 assert "flow" not in output 479 assert "activity" not in output 480 assert "default" in output 481 assert "entities" in output 482 assert "meetings" in output 483 assert "knowledge_graph" in output 484 assert "total" in output 485 486 487def test_parse_run_stats(): 488 """Parse run stats extracts correct counts from fixture JSONL.""" 489 from pathlib import Path 490 491 jsonl = Path("tests/fixtures/journal/agents/default/1700000000001.jsonl") 492 stats = _parse_run_stats(jsonl) 493 assert stats["event_count"] == 6 # all except request 494 assert stats["tool_count"] == 1 # one tool_start 495 assert stats["model"] == "gpt-4o" 496 assert stats["usage"] == {"input_tokens": 150, "output_tokens": 80} 497 assert stats["request"] is not None 498 assert stats["request"]["prompt"] == "Search for meetings about project updates" 499 500 501def test_parse_run_stats_error(): 502 """Parse run stats handles error run JSONL correctly.""" 503 from pathlib import Path 504 505 jsonl = Path("tests/fixtures/journal/agents/flow/1700000000002.jsonl") 506 stats = _parse_run_stats(jsonl) 507 assert stats["event_count"] == 2 # start + error (not request) 508 assert stats["tool_count"] == 0 509 assert stats["model"] == "claude-3-haiku" 510 assert stats["usage"] is None 511 512 513def test_format_bytes(): 514 """Byte formatting produces human-readable strings.""" 515 assert _format_bytes(0) == "0" 516 assert _format_bytes(500) == "500" 517 assert _format_bytes(999) == "999" 518 assert _format_bytes(1000) == "1.0K" 519 assert _format_bytes(1200) == "1.2K" 520 assert _format_bytes(34000) == "34.0K" 521 assert _format_bytes(1500000) == "1.5M" 522 523 524def test_format_cost(): 525 """Cost formatting shows rounded cents.""" 526 assert _format_cost(None) == "-" 527 assert _format_cost(0.0) == "" 528 assert _format_cost(0.001) == "<1¢" 529 assert _format_cost(0.02) == "" 530 assert _format_cost(0.10) == "10¢" 531 assert _format_cost(1.50) == "150¢" 532 533 534def test_log_run_default(capsys): 535 """Log run shows one-line-per-event output.""" 536 log_run("1700000000001") 537 output = capsys.readouterr().out 538 lines = output.strip().splitlines() 539 540 # Fixture has 7 events 541 assert len(lines) == 7 542 543 # Each line should be ≤100 chars 544 for line in lines: 545 assert len(line) <= 100, f"Line too long ({len(line)}): {line}" 546 547 # Check event type labels appear 548 full_output = output 549 assert "request" in full_output 550 assert "start" in full_output 551 assert "think" in full_output 552 assert "tool" in full_output 553 assert "tool_end" in full_output 554 assert "updated" in full_output 555 assert "finish" in full_output 556 557 558def test_log_run_json(capsys): 559 """Log run --json outputs raw JSONL.""" 560 log_run("1700000000001", json_mode=True) 561 output = capsys.readouterr().out 562 lines = [line for line in output.strip().splitlines() if line.strip()] 563 564 assert len(lines) == 7 565 # Each line should be valid JSON 566 for line in lines: 567 parsed = json.loads(line) 568 assert "event" in parsed 569 570 571def test_log_run_full(capsys): 572 """Log run --full shows expanded content with escaped newlines.""" 573 log_run("1700000000001", full=True) 574 output = capsys.readouterr().out 575 576 # The thinking event in the fixture has actual newlines in "content" 577 # In --full mode, these should appear as literal \n 578 assert "\\n" in output 579 580 # Lines can exceed 100 chars in full mode 581 lines = output.strip().splitlines() 582 assert len(lines) == 7 583 584 585def test_log_run_missing(): 586 """Log run with unknown ID exits with error.""" 587 with pytest.raises(SystemExit): 588 log_run("nonexistent_id_12345") 589 590 591def test_log_run_error_run(capsys): 592 """Log run displays error events correctly.""" 593 log_run("1700000000002") 594 output = capsys.readouterr().out 595 lines = output.strip().splitlines() 596 assert len(lines) == 3 # request, start, error 597 assert "error" in output 598 assert "Rate limit" in output 599 600 601def test_show_prompt_context_activity_requires_facet(capsys): 602 """Activity-scheduled prompts require --facet.""" 603 from think.talent_cli import show_prompt_context 604 605 with pytest.raises(SystemExit): 606 show_prompt_context("decisions", day="20260214") 607 608 output = capsys.readouterr().err 609 assert "activity-scheduled" in output.lower() 610 assert "--facet" in output 611 612 613def test_show_prompt_context_activity_requires_activity_id(capsys): 614 """Activity-scheduled prompts require --activity and list available IDs.""" 615 from think.talent_cli import show_prompt_context 616 617 with pytest.raises(SystemExit): 618 show_prompt_context("decisions", day="20260214", facet="full-featured") 619 620 output = capsys.readouterr().err 621 assert "--activity" in output 622 assert "coding_093000_300" in output 623 assert "meeting_140000_300" in output 624 625 626def test_show_prompt_context_activity_not_found(capsys): 627 """Activity-scheduled prompt with unknown activity ID errors.""" 628 from think.talent_cli import show_prompt_context 629 630 with pytest.raises(SystemExit): 631 show_prompt_context( 632 "decisions", 633 day="20260214", 634 facet="full-featured", 635 activity="nonexistent_999", 636 ) 637 638 output = capsys.readouterr().err 639 assert "not found" in output.lower() 640 641 642def test_list_prompts_activity_group(capsys): 643 """List view includes activity group with decisions agent.""" 644 list_prompts() 645 output = capsys.readouterr().out 646 647 assert "activity:" in output 648 assert "decisions" in output