personal memory agent
at main 637 lines 24 kB view raw
1# SPDX-License-Identifier: AGPL-3.0-only 2# Copyright (c) 2026 sol pbc 3 4import importlib 5 6import pytest 7 8from think.utils import day_path 9 10 11def test_cluster(tmp_path, monkeypatch): 12 """Test cluster() uses transcripts and agent output summaries (*.md files).""" 13 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 14 day_dir = day_path("20240101") 15 16 mod = importlib.import_module("think.cluster") 17 # Write JSONL format: metadata first, then entry in segment directory 18 (day_dir / "default" / "120000_300").mkdir(parents=True) 19 (day_dir / "default" / "120000_300" / "audio.jsonl").write_text( 20 '{}\n{"text": "hi"}\n' 21 ) 22 (day_dir / "default" / "120500_300").mkdir(parents=True) 23 (day_dir / "default" / "120500_300" / "agents").mkdir() 24 (day_dir / "default" / "120500_300" / "agents" / "screen.md").write_text( 25 "screen summary" 26 ) 27 result, counts = mod.cluster( 28 "20240101", sources={"transcripts": True, "percepts": False, "agents": True} 29 ) 30 assert counts["transcripts"] == 1 31 assert counts["agents"] == 1 32 assert "### Transcript" in result 33 # Now uses insight rendering: "### {stem} summary" 34 assert "screen summary" in result 35 36 37def test_cluster_range(tmp_path, monkeypatch): 38 """Test cluster_range with transcripts and agents sources.""" 39 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 40 day_dir = day_path("20240101") 41 42 mod = importlib.import_module("think.cluster") 43 # Write JSONL format: metadata first, then entry with proper start time and source in segment directory 44 (day_dir / "default" / "120000_300").mkdir(parents=True) 45 (day_dir / "default" / "120000_300" / "audio.jsonl").write_text( 46 '{"raw": "raw.flac", "model": "whisper-1"}\n' 47 '{"start": "00:00:01", "source": "mic", "text": "hi from audio"}\n' 48 ) 49 (day_dir / "default" / "120000_300" / "agents").mkdir() 50 (day_dir / "default" / "120000_300" / "agents" / "screen.md").write_text( 51 "screen summary content" 52 ) 53 # Test with agents=True to include *.md files 54 md = mod.cluster_range( 55 "20240101", 56 "120000", 57 "120100", 58 sources={"transcripts": True, "percepts": False, "agents": True}, 59 ) 60 # Check that the function works and includes expected sections 61 assert "### Transcript" in md 62 # Now uses insight rendering: "### {stem} summary" 63 assert "screen summary" in md 64 assert "screen summary content" in md 65 66 67def test_cluster_scan(tmp_path, monkeypatch): 68 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 69 day_dir = day_path("20240101") 70 71 mod = importlib.import_module("think.cluster") 72 # Audio transcripts at 09:01, 09:05, 09:20 and 11:00 (JSONL format with empty metadata) 73 (day_dir / "default" / "090101_300").mkdir(parents=True) 74 (day_dir / "default" / "090101_300" / "audio.jsonl").write_text("{}\n") 75 (day_dir / "default" / "090500_300").mkdir(parents=True) 76 (day_dir / "default" / "090500_300" / "audio.jsonl").write_text("{}\n") 77 (day_dir / "default" / "092000_300").mkdir(parents=True) 78 (day_dir / "default" / "092000_300" / "audio.jsonl").write_text("{}\n") 79 (day_dir / "default" / "110000_300").mkdir(parents=True) 80 (day_dir / "default" / "110000_300" / "audio.jsonl").write_text("{}\n") 81 # Screen transcripts at 10:01, 10:05, 10:20 and 12:00 82 (day_dir / "default" / "100101_300").mkdir(parents=True) 83 (day_dir / "default" / "100101_300" / "screen.jsonl").write_text( 84 '{"raw": "screen.webm"}\n' 85 ) 86 (day_dir / "default" / "100500_300").mkdir(parents=True) 87 (day_dir / "default" / "100500_300" / "screen.jsonl").write_text( 88 '{"raw": "screen.webm"}\n' 89 ) 90 (day_dir / "default" / "102000_300").mkdir(parents=True) 91 (day_dir / "default" / "102000_300" / "screen.jsonl").write_text( 92 '{"raw": "screen.webm"}\n' 93 ) 94 (day_dir / "default" / "120000_300").mkdir(parents=True) 95 (day_dir / "default" / "120000_300" / "screen.jsonl").write_text( 96 '{"raw": "screen.webm"}\n' 97 ) 98 audio_ranges, screen_ranges = mod.cluster_scan("20240101") 99 # Expected ranges: 15-minute slot grouping (segments 09:01-09:05-09:20 group together) 100 # Slots: 09:00, 09:00, 09:15 -> ranges: 09:00-09:30; 11:00 -> 11:00-11:15 101 assert audio_ranges == [("09:00", "09:30"), ("11:00", "11:15")] 102 assert screen_ranges == [("10:00", "10:30"), ("12:00", "12:15")] 103 104 105def test_cluster_segments(tmp_path, monkeypatch): 106 """Test cluster_segments returns individual segments with their types.""" 107 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 108 day_dir = day_path("20240101") 109 110 mod = importlib.import_module("think.cluster") 111 112 # Create segment with duration: 090000_300 (09:00:00 for 5 minutes) 113 (day_dir / "default" / "090000_300").mkdir(parents=True) 114 (day_dir / "default" / "090000_300" / "audio.jsonl").write_text("{}\n") 115 116 # Create segment with both audio and screen 117 (day_dir / "default" / "100000_600").mkdir(parents=True) 118 (day_dir / "default" / "100000_600" / "audio.jsonl").write_text("{}\n") 119 (day_dir / "default" / "100000_600" / "screen.jsonl").write_text( 120 '{"raw": "screen.webm"}\n' 121 ) 122 123 # Create segment with only screen 124 (day_dir / "default" / "110000_300").mkdir(parents=True) 125 (day_dir / "default" / "110000_300" / "screen.jsonl").write_text( 126 '{"raw": "screen.webm"}\n' 127 ) 128 129 segments = mod.cluster_segments("20240101") 130 131 assert len(segments) == 3 132 133 # Check first segment (audio only) 134 assert segments[0]["key"] == "090000_300" 135 assert segments[0]["start"] == "09:00" 136 assert segments[0]["end"] == "09:05" 137 assert segments[0]["types"] == ["audio"] 138 139 # Check second segment (both transcripts and screen) 140 assert segments[1]["key"] == "100000_600" 141 assert segments[1]["start"] == "10:00" 142 assert segments[1]["end"] == "10:10" 143 assert "audio" in segments[1]["types"] 144 assert "screen" in segments[1]["types"] 145 146 # Check third segment (screen only) 147 assert segments[2]["key"] == "110000_300" 148 assert segments[2]["start"] == "11:00" 149 assert segments[2]["end"] == "11:05" 150 assert segments[2]["types"] == ["screen"] 151 152 153def test_cluster_period_uses_raw_screen(tmp_path, monkeypatch): 154 """Test cluster_period uses raw screen.jsonl, not insight *.md files.""" 155 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 156 day_dir = day_path("20240101") 157 158 mod = importlib.import_module("think.cluster") 159 160 # Create segment with both audio and raw screen data 161 segment = day_dir / "default" / "100000_300" 162 segment.mkdir(parents=True) 163 (segment / "audio.jsonl").write_text( 164 '{"raw": "audio.flac"}\n{"start": "00:00:01", "text": "hello"}\n' 165 ) 166 # Raw screen.jsonl with frame analysis (what cluster_period should use) 167 (segment / "screen.jsonl").write_text( 168 '{"raw": "screen.webm"}\n' 169 '{"timestamp": 10, "analysis": {"primary": "code_editor", ' 170 '"visual_description": "VS Code with Python file"}}\n' 171 ) 172 # Also create screen.md (insight) to verify it's NOT used by cluster_period 173 (segment / "agents").mkdir() 174 (segment / "agents" / "screen.md").write_text("This insight should NOT appear") 175 176 result, counts = mod.cluster_period( 177 "20240101", 178 "100000_300", 179 sources={"transcripts": True, "percepts": True, "agents": False}, 180 ) 181 182 # Should have both transcript and screen entries 183 assert counts["transcripts"] == 1 184 assert counts["percepts"] == 1 185 assert "### Transcript" in result 186 # Should use raw screen format header 187 assert "Screen Activity" in result 188 # Raw screen content should be present 189 assert "VS Code with Python file" in result 190 # Insight content should NOT be present (agents=False for cluster_period) 191 assert "This insight should NOT appear" not in result 192 193 194def test_load_entries_from_toplevel_segment(tmp_path, monkeypatch): 195 """_load_entries_from_segment resolves the day for top-level segment dirs.""" 196 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 197 day_dir = day_path("20240101") 198 segment = day_dir / "100000_300" 199 segment.mkdir() 200 201 mod = importlib.import_module("think.cluster") 202 203 entries = mod._load_entries_from_segment( 204 str(segment), 205 transcripts=True, 206 percepts=False, 207 agents=False, 208 ) 209 210 assert entries == [] 211 212 213def test_cluster_range_with_agents(tmp_path, monkeypatch): 214 """Test cluster_range with agents source loads all *.md files.""" 215 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 216 day_dir = day_path("20240101") 217 218 mod = importlib.import_module("think.cluster") 219 220 # Create segment with multiple insight files 221 segment = day_dir / "default" / "100000_300" 222 segment.mkdir(parents=True) 223 (segment / "agents").mkdir() 224 (segment / "audio.jsonl").write_text( 225 '{"raw": "audio.flac"}\n{"start": "00:00:01", "text": "hello"}\n' 226 ) 227 (segment / "agents" / "screen.md").write_text("Screen activity summary") 228 (segment / "agents" / "activity.md").write_text("Activity insight content") 229 # Also create screen.jsonl to verify it's NOT used when agents=True, screen=False 230 (segment / "screen.jsonl").write_text( 231 '{"raw": "screen.webm"}\n' 232 '{"timestamp": 10, "analysis": {"primary": "code_editor"}}\n' 233 ) 234 235 # Test agents=True returns *.md summaries, not raw screen data 236 result = mod.cluster_range( 237 "20240101", 238 "100000", 239 "100500", 240 sources={"transcripts": True, "percepts": False, "agents": True}, 241 ) 242 243 assert "### Transcript" in result 244 # Should include both .md files as agent outputs 245 assert "### screen summary" in result 246 assert "Screen activity summary" in result 247 assert "### activity summary" in result 248 assert "Activity insight content" in result 249 # Should NOT include raw screen data 250 assert "code_editor" not in result 251 252 253def test_cluster_range_with_screen(tmp_path, monkeypatch): 254 """Test cluster_range with screen source loads raw screen.jsonl data.""" 255 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 256 day_dir = day_path("20240101") 257 258 mod = importlib.import_module("think.cluster") 259 260 # Create segment with raw screen data and insight file 261 segment = day_dir / "default" / "100000_300" 262 segment.mkdir(parents=True) 263 (segment / "agents").mkdir() 264 (segment / "screen.jsonl").write_text( 265 '{"raw": "screen.webm"}\n' 266 '{"timestamp": 10, "analysis": {"primary": "code_editor"}}\n' 267 ) 268 (segment / "agents" / "screen.md").write_text("Screen summary insight") 269 270 # Test screen=True returns raw screen data, not agent outputs 271 result = mod.cluster_range( 272 "20240101", 273 "100000", 274 "100500", 275 sources={"transcripts": False, "percepts": True, "agents": False}, 276 ) 277 278 assert "Screen Activity" in result 279 assert "code_editor" in result 280 # Should NOT include insight content 281 assert "Screen summary insight" not in result 282 assert "### screen summary" not in result 283 284 285def test_cluster_range_with_multiple_screen_files(tmp_path, monkeypatch): 286 """Test cluster_range loads multiple *_screen.jsonl files per segment.""" 287 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 288 day_dir = day_path("20240101") 289 290 mod = importlib.import_module("think.cluster") 291 292 # Create segment with multiple screen files (like multi-monitor setup) 293 segment = day_dir / "default" / "100000_300" 294 segment.mkdir(parents=True) 295 (segment / "screen.jsonl").write_text( 296 '{"raw": "screen.webm"}\n' 297 '{"timestamp": 10, "analysis": {"primary": "code_editor", ' 298 '"visual_description": "Primary monitor with VS Code"}}\n' 299 ) 300 (segment / "monitor_2_screen.jsonl").write_text( 301 '{"raw": "monitor_2.webm"}\n' 302 '{"timestamp": 10, "analysis": {"primary": "browser", ' 303 '"visual_description": "Secondary monitor with documentation"}}\n' 304 ) 305 306 # Test screen=True returns data from both screen files 307 result = mod.cluster_range( 308 "20240101", 309 "100000", 310 "100500", 311 sources={"transcripts": False, "percepts": True, "agents": False}, 312 ) 313 314 # Should include content from both screen files 315 assert "Primary monitor with VS Code" in result 316 assert "Secondary monitor with documentation" in result 317 318 319def test_cluster_scan_with_split_screen(tmp_path, monkeypatch): 320 """Test cluster_scan detects *_screen.jsonl files.""" 321 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 322 day_dir = day_path("20240101") 323 324 mod = importlib.import_module("think.cluster") 325 326 # Create segment with only *_screen.jsonl (no screen.jsonl) 327 (day_dir / "default" / "100000_300").mkdir(parents=True) 328 (day_dir / "default" / "100000_300" / "monitor_1_screen.jsonl").write_text( 329 '{"raw": "m1.webm"}\n' 330 ) 331 332 audio_ranges, screen_ranges = mod.cluster_scan("20240101") 333 334 # Should detect the segment as having screen content (15-minute slot grouping) 335 assert screen_ranges == [("10:00", "10:15")] 336 337 338def test_cluster_segments_with_split_screen(tmp_path, monkeypatch): 339 """Test cluster_segments detects *_screen.jsonl files.""" 340 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 341 day_dir = day_path("20240101") 342 343 mod = importlib.import_module("think.cluster") 344 345 # Create segment with only *_screen.jsonl (no screen.jsonl) 346 (day_dir / "default" / "100000_300").mkdir(parents=True) 347 (day_dir / "default" / "100000_300" / "wayland_screen.jsonl").write_text( 348 '{"raw": "w.webm"}\n' 349 ) 350 351 segments = mod.cluster_segments("20240101") 352 353 assert len(segments) == 1 354 assert segments[0]["key"] == "100000_300" 355 assert "screen" in segments[0]["types"] 356 357 358def test_cluster_span(tmp_path, monkeypatch): 359 """Test cluster_span processes a span of segments.""" 360 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 361 day_dir = day_path("20240101") 362 363 mod = importlib.import_module("think.cluster") 364 365 # Create three segments with different content 366 (day_dir / "default" / "090000_300").mkdir(parents=True) 367 (day_dir / "default" / "090000_300" / "audio.jsonl").write_text( 368 '{"raw": "audio.flac"}\n{"start": "00:00:01", "text": "morning segment"}\n' 369 ) 370 371 (day_dir / "default" / "100000_300").mkdir(parents=True) 372 (day_dir / "default" / "100000_300" / "audio.jsonl").write_text( 373 '{"raw": "audio.flac"}\n{"start": "00:00:01", "text": "mid-morning segment"}\n' 374 ) 375 (day_dir / "default" / "100000_300" / "screen.jsonl").write_text( 376 '{"raw": "screen.webm"}\n' 377 '{"timestamp": 10, "analysis": {"primary": "code_editor"}}\n' 378 ) 379 380 (day_dir / "default" / "110000_300").mkdir(parents=True) 381 (day_dir / "default" / "110000_300" / "audio.jsonl").write_text( 382 '{"raw": "audio.flac"}\n{"start": "00:00:01", "text": "late morning segment"}\n' 383 ) 384 385 # Process only first and third segments as a span (audio only, no screen) 386 result, counts = mod.cluster_span( 387 "20240101", 388 ["090000_300", "110000_300"], 389 sources={"transcripts": True, "percepts": False, "agents": False}, 390 ) 391 392 # Should have 2 transcript entries (one per segment) 393 assert counts["transcripts"] == 2 394 assert counts["percepts"] == 0 395 assert "morning segment" in result 396 assert "late morning segment" in result 397 # Should NOT include the skipped segment 398 assert "mid-morning segment" not in result 399 assert "code_editor" not in result 400 401 402def test_cluster_span_missing_segment(tmp_path, monkeypatch): 403 """Test cluster_span fails fast when segment is missing.""" 404 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 405 day_dir = day_path("20240101") 406 407 mod = importlib.import_module("think.cluster") 408 409 # Create only one segment 410 (day_dir / "default" / "090000_300").mkdir(parents=True) 411 (day_dir / "default" / "090000_300" / "audio.jsonl").write_text( 412 '{"raw": "audio.flac"}\n' 413 ) 414 415 # Try to process existing and non-existing segments 416 with pytest.raises(ValueError) as exc_info: 417 mod.cluster_span( 418 "20240101", 419 ["090000_300", "100000_300"], 420 sources={"transcripts": True, "percepts": False, "agents": False}, 421 ) 422 423 assert "100000_300" in str(exc_info.value) 424 assert "not found" in str(exc_info.value) 425 426 427def test_cluster_with_agent_filter_dict(tmp_path, monkeypatch): 428 """Test cluster() with dict-valued agents source for selective filtering.""" 429 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 430 day_dir = day_path("20240101") 431 432 mod = importlib.import_module("think.cluster") 433 434 # Create segment with multiple agent output files 435 segment = day_dir / "default" / "120000_300" 436 segment.mkdir(parents=True) 437 (segment / "agents").mkdir() 438 (segment / "audio.jsonl").write_text('{}\n{"text": "hello"}\n') 439 (segment / "agents" / "entities.md").write_text("Entity extraction results") 440 (segment / "agents" / "meetings.md").write_text("Meeting summary results") 441 (segment / "agents" / "flow.md").write_text("Flow analysis results") 442 443 # Test filtering to only include entities 444 result, counts = mod.cluster( 445 "20240101", 446 sources={"transcripts": True, "percepts": False, "agents": {"entities": True}}, 447 ) 448 449 assert counts["transcripts"] == 1 450 assert counts["agents"] == 1 # Only entities should be counted 451 assert "Entity extraction results" in result 452 assert "Meeting summary results" not in result 453 assert "Flow analysis results" not in result 454 455 456def test_cluster_with_agent_filter_multiple(tmp_path, monkeypatch): 457 """Test cluster() with dict selecting multiple agents.""" 458 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 459 day_dir = day_path("20240101") 460 461 mod = importlib.import_module("think.cluster") 462 463 # Create segment with multiple agent output files 464 segment = day_dir / "default" / "120000_300" 465 segment.mkdir(parents=True) 466 (segment / "agents").mkdir() 467 (segment / "audio.jsonl").write_text('{}\n{"text": "hello"}\n') 468 (segment / "agents" / "entities.md").write_text("Entity extraction results") 469 (segment / "agents" / "meetings.md").write_text("Meeting summary results") 470 (segment / "agents" / "flow.md").write_text("Flow analysis results") 471 472 # Test filtering to include entities and meetings but not flow 473 result, counts = mod.cluster( 474 "20240101", 475 sources={ 476 "transcripts": True, 477 "percepts": False, 478 "agents": {"entities": True, "meetings": "required", "flow": False}, 479 }, 480 ) 481 482 assert counts["transcripts"] == 1 483 assert counts["agents"] == 2 # entities + meetings 484 assert "Entity extraction results" in result 485 assert "Meeting summary results" in result 486 assert "Flow analysis results" not in result 487 488 489def test_cluster_with_agent_filter_app_namespaced(tmp_path, monkeypatch): 490 """Test cluster() with dict filtering app-namespaced agent outputs.""" 491 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 492 day_dir = day_path("20240101") 493 494 mod = importlib.import_module("think.cluster") 495 496 # Create segment with app-namespaced agent output files 497 # App agent output naming: "app:agent" -> "_app_agent.md" 498 segment = day_dir / "default" / "120000_300" 499 segment.mkdir(parents=True) 500 (segment / "agents").mkdir() 501 (segment / "audio.jsonl").write_text('{}\n{"text": "hello"}\n') 502 (segment / "agents" / "entities.md").write_text("System entity results") 503 (segment / "agents" / "_todos_review.md").write_text("Todos review results") 504 505 # Test filtering to include app-namespaced agent 506 result, counts = mod.cluster( 507 "20240101", 508 sources={ 509 "transcripts": True, 510 "percepts": False, 511 "agents": {"entities": False, "todos:review": True}, 512 }, 513 ) 514 515 assert counts["transcripts"] == 1 516 assert counts["agents"] == 1 # Only todos:review 517 assert "System entity results" not in result 518 assert "Todos review results" in result 519 520 521def test_cluster_with_empty_agent_filter(tmp_path, monkeypatch): 522 """Test cluster() with empty dict means no agents.""" 523 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 524 day_dir = day_path("20240101") 525 526 mod = importlib.import_module("think.cluster") 527 528 segment = day_dir / "default" / "120000_300" 529 segment.mkdir(parents=True) 530 (segment / "agents").mkdir() 531 (segment / "audio.jsonl").write_text('{}\n{"text": "hello"}\n') 532 (segment / "agents" / "entities.md").write_text("Entity extraction results") 533 534 # Empty dict should mean no agents 535 result, counts = mod.cluster( 536 "20240101", 537 sources={"transcripts": True, "percepts": False, "agents": {}}, 538 ) 539 540 assert counts["transcripts"] == 1 541 assert counts["agents"] == 0 542 assert "Entity extraction results" not in result 543 544 545def test_filename_to_agent_key(): 546 """Test _filename_to_agent_key conversion.""" 547 from think.cluster import _filename_to_agent_key 548 549 # System agents 550 assert _filename_to_agent_key("entities") == "entities" 551 assert _filename_to_agent_key("flow") == "flow" 552 553 # App-namespaced agents 554 assert _filename_to_agent_key("_todos_review") == "todos:review" 555 assert _filename_to_agent_key("_entities_observer") == "entities:observer" 556 557 # Edge case: single underscore component 558 assert _filename_to_agent_key("_app") == "_app" # No second part, returns as-is 559 560 561def test_agent_matches_filter(): 562 """Test _agent_matches_filter logic.""" 563 from think.cluster import _agent_matches_filter 564 565 # None filter means all agents 566 assert _agent_matches_filter("entities", None) is True 567 assert _agent_matches_filter("_todos_review", None) is True 568 569 # Empty dict means no agents 570 assert _agent_matches_filter("entities", {}) is False 571 assert _agent_matches_filter("_todos_review", {}) is False 572 573 # Specific filtering 574 filter_dict = {"entities": True, "meetings": False, "todos:review": "required"} 575 assert _agent_matches_filter("entities", filter_dict) is True 576 assert _agent_matches_filter("meetings", filter_dict) is False 577 assert _agent_matches_filter("_todos_review", filter_dict) is True 578 assert _agent_matches_filter("flow", filter_dict) is False # Not in filter 579 580 581def test_scan_day_combined(tmp_path, monkeypatch): 582 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 583 day_dir = day_path("20240101") 584 585 mod = importlib.import_module("think.cluster") 586 587 first = day_dir / "default" / "090000_300" 588 first.mkdir(parents=True) 589 (first / "audio.jsonl").write_text("{}\n") 590 (first / "screen.jsonl").write_text('{"raw": "screen.webm"}\n') 591 592 second = day_dir / "default" / "093000_300" 593 second.mkdir(parents=True) 594 (second / "audio.jsonl").write_text("{}\n") 595 596 audio_ranges, screen_ranges, segments = mod.scan_day("20240101") 597 expected_ranges = mod.cluster_scan("20240101") 598 expected_segments = mod.cluster_segments("20240101") 599 600 assert audio_ranges == [("09:00", "09:15"), ("09:30", "09:45")] 601 assert screen_ranges == [("09:00", "09:15")] 602 assert segments == [ 603 { 604 "key": "090000_300", 605 "start": "09:00", 606 "end": "09:05", 607 "types": ["audio", "screen"], 608 "stream": "default", 609 }, 610 { 611 "key": "093000_300", 612 "start": "09:30", 613 "end": "09:35", 614 "types": ["audio"], 615 "stream": "default", 616 }, 617 ] 618 assert (audio_ranges, screen_ranges) == expected_ranges 619 assert segments == expected_segments 620 621 622def test_scan_day_empty(tmp_path, monkeypatch): 623 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 624 625 mod = importlib.import_module("think.cluster") 626 627 assert mod.scan_day("20250101") == ([], [], []) 628 629 630def test_day_path_create_false(tmp_path, monkeypatch): 631 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 632 633 missing = day_path("29990101", create=False) 634 assert not missing.exists() 635 636 created = day_path("29990101") 637 assert created.exists()