tests/test_cluster.py at main · solpbc.org/solstone

solpbc.org / solstone
fork atom
personal memory agent
fork atom
solstone / tests / test_cluster.py
at main 637 lines 24 kB view raw
wrap content
Jer Miller feat: consolidate transcripts dual traversal into single scan_day() 2d ago
cc8fed8f
  1# SPDX-License-Identifier: AGPL-3.0-only
  2# Copyright (c) 2026 sol pbc
  3
  4import importlib
  5
  6import pytest
  7
  8from think.utils import day_path
  9
 10
 11def test_cluster(tmp_path, monkeypatch):
 12    """Test cluster() uses transcripts and agent output summaries (*.md files)."""
 13    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
 14    day_dir = day_path("20240101")
 15
 16    mod = importlib.import_module("think.cluster")
 17    # Write JSONL format: metadata first, then entry in segment directory
 18    (day_dir / "default" / "120000_300").mkdir(parents=True)
 19    (day_dir / "default" / "120000_300" / "audio.jsonl").write_text(
 20        '{}\n{"text": "hi"}\n'
 21    )
 22    (day_dir / "default" / "120500_300").mkdir(parents=True)
 23    (day_dir / "default" / "120500_300" / "agents").mkdir()
 24    (day_dir / "default" / "120500_300" / "agents" / "screen.md").write_text(
 25        "screen summary"
 26    )
 27    result, counts = mod.cluster(
 28        "20240101", sources={"transcripts": True, "percepts": False, "agents": True}
 29    )
 30    assert counts["transcripts"] == 1
 31    assert counts["agents"] == 1
 32    assert "### Transcript" in result
 33    # Now uses insight rendering: "### {stem} summary"
 34    assert "screen summary" in result
 35
 36
 37def test_cluster_range(tmp_path, monkeypatch):
 38    """Test cluster_range with transcripts and agents sources."""
 39    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
 40    day_dir = day_path("20240101")
 41
 42    mod = importlib.import_module("think.cluster")
 43    # Write JSONL format: metadata first, then entry with proper start time and source in segment directory
 44    (day_dir / "default" / "120000_300").mkdir(parents=True)
 45    (day_dir / "default" / "120000_300" / "audio.jsonl").write_text(
 46        '{"raw": "raw.flac", "model": "whisper-1"}\n'
 47        '{"start": "00:00:01", "source": "mic", "text": "hi from audio"}\n'
 48    )
 49    (day_dir / "default" / "120000_300" / "agents").mkdir()
 50    (day_dir / "default" / "120000_300" / "agents" / "screen.md").write_text(
 51        "screen summary content"
 52    )
 53    # Test with agents=True to include *.md files
 54    md = mod.cluster_range(
 55        "20240101",
 56        "120000",
 57        "120100",
 58        sources={"transcripts": True, "percepts": False, "agents": True},
 59    )
 60    # Check that the function works and includes expected sections
 61    assert "### Transcript" in md
 62    # Now uses insight rendering: "### {stem} summary"
 63    assert "screen summary" in md
 64    assert "screen summary content" in md
 65
 66
 67def test_cluster_scan(tmp_path, monkeypatch):
 68    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
 69    day_dir = day_path("20240101")
 70
 71    mod = importlib.import_module("think.cluster")
 72    # Audio transcripts at 09:01, 09:05, 09:20 and 11:00 (JSONL format with empty metadata)
 73    (day_dir / "default" / "090101_300").mkdir(parents=True)
 74    (day_dir / "default" / "090101_300" / "audio.jsonl").write_text("{}\n")
 75    (day_dir / "default" / "090500_300").mkdir(parents=True)
 76    (day_dir / "default" / "090500_300" / "audio.jsonl").write_text("{}\n")
 77    (day_dir / "default" / "092000_300").mkdir(parents=True)
 78    (day_dir / "default" / "092000_300" / "audio.jsonl").write_text("{}\n")
 79    (day_dir / "default" / "110000_300").mkdir(parents=True)
 80    (day_dir / "default" / "110000_300" / "audio.jsonl").write_text("{}\n")
 81    # Screen transcripts at 10:01, 10:05, 10:20 and 12:00
 82    (day_dir / "default" / "100101_300").mkdir(parents=True)
 83    (day_dir / "default" / "100101_300" / "screen.jsonl").write_text(
 84        '{"raw": "screen.webm"}\n'
 85    )
 86    (day_dir / "default" / "100500_300").mkdir(parents=True)
 87    (day_dir / "default" / "100500_300" / "screen.jsonl").write_text(
 88        '{"raw": "screen.webm"}\n'
 89    )
 90    (day_dir / "default" / "102000_300").mkdir(parents=True)
 91    (day_dir / "default" / "102000_300" / "screen.jsonl").write_text(
 92        '{"raw": "screen.webm"}\n'
 93    )
 94    (day_dir / "default" / "120000_300").mkdir(parents=True)
 95    (day_dir / "default" / "120000_300" / "screen.jsonl").write_text(
 96        '{"raw": "screen.webm"}\n'
 97    )
 98    audio_ranges, screen_ranges = mod.cluster_scan("20240101")
 99    # Expected ranges: 15-minute slot grouping (segments 09:01-09:05-09:20 group together)
100    # Slots: 09:00, 09:00, 09:15 -> ranges: 09:00-09:30; 11:00 -> 11:00-11:15
101    assert audio_ranges == [("09:00", "09:30"), ("11:00", "11:15")]
102    assert screen_ranges == [("10:00", "10:30"), ("12:00", "12:15")]
103
104
105def test_cluster_segments(tmp_path, monkeypatch):
106    """Test cluster_segments returns individual segments with their types."""
107    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
108    day_dir = day_path("20240101")
109
110    mod = importlib.import_module("think.cluster")
111
112    # Create segment with duration: 090000_300 (09:00:00 for 5 minutes)
113    (day_dir / "default" / "090000_300").mkdir(parents=True)
114    (day_dir / "default" / "090000_300" / "audio.jsonl").write_text("{}\n")
115
116    # Create segment with both audio and screen
117    (day_dir / "default" / "100000_600").mkdir(parents=True)
118    (day_dir / "default" / "100000_600" / "audio.jsonl").write_text("{}\n")
119    (day_dir / "default" / "100000_600" / "screen.jsonl").write_text(
120        '{"raw": "screen.webm"}\n'
121    )
122
123    # Create segment with only screen
124    (day_dir / "default" / "110000_300").mkdir(parents=True)
125    (day_dir / "default" / "110000_300" / "screen.jsonl").write_text(
126        '{"raw": "screen.webm"}\n'
127    )
128
129    segments = mod.cluster_segments("20240101")
130
131    assert len(segments) == 3
132
133    # Check first segment (audio only)
134    assert segments[0]["key"] == "090000_300"
135    assert segments[0]["start"] == "09:00"
136    assert segments[0]["end"] == "09:05"
137    assert segments[0]["types"] == ["audio"]
138
139    # Check second segment (both transcripts and screen)
140    assert segments[1]["key"] == "100000_600"
141    assert segments[1]["start"] == "10:00"
142    assert segments[1]["end"] == "10:10"
143    assert "audio" in segments[1]["types"]
144    assert "screen" in segments[1]["types"]
145
146    # Check third segment (screen only)
147    assert segments[2]["key"] == "110000_300"
148    assert segments[2]["start"] == "11:00"
149    assert segments[2]["end"] == "11:05"
150    assert segments[2]["types"] == ["screen"]
151
152
153def test_cluster_period_uses_raw_screen(tmp_path, monkeypatch):
154    """Test cluster_period uses raw screen.jsonl, not insight *.md files."""
155    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
156    day_dir = day_path("20240101")
157
158    mod = importlib.import_module("think.cluster")
159
160    # Create segment with both audio and raw screen data
161    segment = day_dir / "default" / "100000_300"
162    segment.mkdir(parents=True)
163    (segment / "audio.jsonl").write_text(
164        '{"raw": "audio.flac"}\n{"start": "00:00:01", "text": "hello"}\n'
165    )
166    # Raw screen.jsonl with frame analysis (what cluster_period should use)
167    (segment / "screen.jsonl").write_text(
168        '{"raw": "screen.webm"}\n'
169        '{"timestamp": 10, "analysis": {"primary": "code_editor", '
170        '"visual_description": "VS Code with Python file"}}\n'
171    )
172    # Also create screen.md (insight) to verify it's NOT used by cluster_period
173    (segment / "agents").mkdir()
174    (segment / "agents" / "screen.md").write_text("This insight should NOT appear")
175
176    result, counts = mod.cluster_period(
177        "20240101",
178        "100000_300",
179        sources={"transcripts": True, "percepts": True, "agents": False},
180    )
181
182    # Should have both transcript and screen entries
183    assert counts["transcripts"] == 1
184    assert counts["percepts"] == 1
185    assert "### Transcript" in result
186    # Should use raw screen format header
187    assert "Screen Activity" in result
188    # Raw screen content should be present
189    assert "VS Code with Python file" in result
190    # Insight content should NOT be present (agents=False for cluster_period)
191    assert "This insight should NOT appear" not in result
192
193
194def test_load_entries_from_toplevel_segment(tmp_path, monkeypatch):
195    """_load_entries_from_segment resolves the day for top-level segment dirs."""
196    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
197    day_dir = day_path("20240101")
198    segment = day_dir / "100000_300"
199    segment.mkdir()
200
201    mod = importlib.import_module("think.cluster")
202
203    entries = mod._load_entries_from_segment(
204        str(segment),
205        transcripts=True,
206        percepts=False,
207        agents=False,
208    )
209
210    assert entries == []
211
212
213def test_cluster_range_with_agents(tmp_path, monkeypatch):
214    """Test cluster_range with agents source loads all *.md files."""
215    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
216    day_dir = day_path("20240101")
217
218    mod = importlib.import_module("think.cluster")
219
220    # Create segment with multiple insight files
221    segment = day_dir / "default" / "100000_300"
222    segment.mkdir(parents=True)
223    (segment / "agents").mkdir()
224    (segment / "audio.jsonl").write_text(
225        '{"raw": "audio.flac"}\n{"start": "00:00:01", "text": "hello"}\n'
226    )
227    (segment / "agents" / "screen.md").write_text("Screen activity summary")
228    (segment / "agents" / "activity.md").write_text("Activity insight content")
229    # Also create screen.jsonl to verify it's NOT used when agents=True, screen=False
230    (segment / "screen.jsonl").write_text(
231        '{"raw": "screen.webm"}\n'
232        '{"timestamp": 10, "analysis": {"primary": "code_editor"}}\n'
233    )
234
235    # Test agents=True returns *.md summaries, not raw screen data
236    result = mod.cluster_range(
237        "20240101",
238        "100000",
239        "100500",
240        sources={"transcripts": True, "percepts": False, "agents": True},
241    )
242
243    assert "### Transcript" in result
244    # Should include both .md files as agent outputs
245    assert "### screen summary" in result
246    assert "Screen activity summary" in result
247    assert "### activity summary" in result
248    assert "Activity insight content" in result
249    # Should NOT include raw screen data
250    assert "code_editor" not in result
251
252
253def test_cluster_range_with_screen(tmp_path, monkeypatch):
254    """Test cluster_range with screen source loads raw screen.jsonl data."""
255    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
256    day_dir = day_path("20240101")
257
258    mod = importlib.import_module("think.cluster")
259
260    # Create segment with raw screen data and insight file
261    segment = day_dir / "default" / "100000_300"
262    segment.mkdir(parents=True)
263    (segment / "agents").mkdir()
264    (segment / "screen.jsonl").write_text(
265        '{"raw": "screen.webm"}\n'
266        '{"timestamp": 10, "analysis": {"primary": "code_editor"}}\n'
267    )
268    (segment / "agents" / "screen.md").write_text("Screen summary insight")
269
270    # Test screen=True returns raw screen data, not agent outputs
271    result = mod.cluster_range(
272        "20240101",
273        "100000",
274        "100500",
275        sources={"transcripts": False, "percepts": True, "agents": False},
276    )
277
278    assert "Screen Activity" in result
279    assert "code_editor" in result
280    # Should NOT include insight content
281    assert "Screen summary insight" not in result
282    assert "### screen summary" not in result
283
284
285def test_cluster_range_with_multiple_screen_files(tmp_path, monkeypatch):
286    """Test cluster_range loads multiple *_screen.jsonl files per segment."""
287    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
288    day_dir = day_path("20240101")
289
290    mod = importlib.import_module("think.cluster")
291
292    # Create segment with multiple screen files (like multi-monitor setup)
293    segment = day_dir / "default" / "100000_300"
294    segment.mkdir(parents=True)
295    (segment / "screen.jsonl").write_text(
296        '{"raw": "screen.webm"}\n'
297        '{"timestamp": 10, "analysis": {"primary": "code_editor", '
298        '"visual_description": "Primary monitor with VS Code"}}\n'
299    )
300    (segment / "monitor_2_screen.jsonl").write_text(
301        '{"raw": "monitor_2.webm"}\n'
302        '{"timestamp": 10, "analysis": {"primary": "browser", '
303        '"visual_description": "Secondary monitor with documentation"}}\n'
304    )
305
306    # Test screen=True returns data from both screen files
307    result = mod.cluster_range(
308        "20240101",
309        "100000",
310        "100500",
311        sources={"transcripts": False, "percepts": True, "agents": False},
312    )
313
314    # Should include content from both screen files
315    assert "Primary monitor with VS Code" in result
316    assert "Secondary monitor with documentation" in result
317
318
319def test_cluster_scan_with_split_screen(tmp_path, monkeypatch):
320    """Test cluster_scan detects *_screen.jsonl files."""
321    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
322    day_dir = day_path("20240101")
323
324    mod = importlib.import_module("think.cluster")
325
326    # Create segment with only *_screen.jsonl (no screen.jsonl)
327    (day_dir / "default" / "100000_300").mkdir(parents=True)
328    (day_dir / "default" / "100000_300" / "monitor_1_screen.jsonl").write_text(
329        '{"raw": "m1.webm"}\n'
330    )
331
332    audio_ranges, screen_ranges = mod.cluster_scan("20240101")
333
334    # Should detect the segment as having screen content (15-minute slot grouping)
335    assert screen_ranges == [("10:00", "10:15")]
336
337
338def test_cluster_segments_with_split_screen(tmp_path, monkeypatch):
339    """Test cluster_segments detects *_screen.jsonl files."""
340    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
341    day_dir = day_path("20240101")
342
343    mod = importlib.import_module("think.cluster")
344
345    # Create segment with only *_screen.jsonl (no screen.jsonl)
346    (day_dir / "default" / "100000_300").mkdir(parents=True)
347    (day_dir / "default" / "100000_300" / "wayland_screen.jsonl").write_text(
348        '{"raw": "w.webm"}\n'
349    )
350
351    segments = mod.cluster_segments("20240101")
352
353    assert len(segments) == 1
354    assert segments[0]["key"] == "100000_300"
355    assert "screen" in segments[0]["types"]
356
357
358def test_cluster_span(tmp_path, monkeypatch):
359    """Test cluster_span processes a span of segments."""
360    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
361    day_dir = day_path("20240101")
362
363    mod = importlib.import_module("think.cluster")
364
365    # Create three segments with different content
366    (day_dir / "default" / "090000_300").mkdir(parents=True)
367    (day_dir / "default" / "090000_300" / "audio.jsonl").write_text(
368        '{"raw": "audio.flac"}\n{"start": "00:00:01", "text": "morning segment"}\n'
369    )
370
371    (day_dir / "default" / "100000_300").mkdir(parents=True)
372    (day_dir / "default" / "100000_300" / "audio.jsonl").write_text(
373        '{"raw": "audio.flac"}\n{"start": "00:00:01", "text": "mid-morning segment"}\n'
374    )
375    (day_dir / "default" / "100000_300" / "screen.jsonl").write_text(
376        '{"raw": "screen.webm"}\n'
377        '{"timestamp": 10, "analysis": {"primary": "code_editor"}}\n'
378    )
379
380    (day_dir / "default" / "110000_300").mkdir(parents=True)
381    (day_dir / "default" / "110000_300" / "audio.jsonl").write_text(
382        '{"raw": "audio.flac"}\n{"start": "00:00:01", "text": "late morning segment"}\n'
383    )
384
385    # Process only first and third segments as a span (audio only, no screen)
386    result, counts = mod.cluster_span(
387        "20240101",
388        ["090000_300", "110000_300"],
389        sources={"transcripts": True, "percepts": False, "agents": False},
390    )
391
392    # Should have 2 transcript entries (one per segment)
393    assert counts["transcripts"] == 2
394    assert counts["percepts"] == 0
395    assert "morning segment" in result
396    assert "late morning segment" in result
397    # Should NOT include the skipped segment
398    assert "mid-morning segment" not in result
399    assert "code_editor" not in result
400
401
402def test_cluster_span_missing_segment(tmp_path, monkeypatch):
403    """Test cluster_span fails fast when segment is missing."""
404    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
405    day_dir = day_path("20240101")
406
407    mod = importlib.import_module("think.cluster")
408
409    # Create only one segment
410    (day_dir / "default" / "090000_300").mkdir(parents=True)
411    (day_dir / "default" / "090000_300" / "audio.jsonl").write_text(
412        '{"raw": "audio.flac"}\n'
413    )
414
415    # Try to process existing and non-existing segments
416    with pytest.raises(ValueError) as exc_info:
417        mod.cluster_span(
418            "20240101",
419            ["090000_300", "100000_300"],
420            sources={"transcripts": True, "percepts": False, "agents": False},
421        )
422
423    assert "100000_300" in str(exc_info.value)
424    assert "not found" in str(exc_info.value)
425
426
427def test_cluster_with_agent_filter_dict(tmp_path, monkeypatch):
428    """Test cluster() with dict-valued agents source for selective filtering."""
429    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
430    day_dir = day_path("20240101")
431
432    mod = importlib.import_module("think.cluster")
433
434    # Create segment with multiple agent output files
435    segment = day_dir / "default" / "120000_300"
436    segment.mkdir(parents=True)
437    (segment / "agents").mkdir()
438    (segment / "audio.jsonl").write_text('{}\n{"text": "hello"}\n')
439    (segment / "agents" / "entities.md").write_text("Entity extraction results")
440    (segment / "agents" / "meetings.md").write_text("Meeting summary results")
441    (segment / "agents" / "flow.md").write_text("Flow analysis results")
442
443    # Test filtering to only include entities
444    result, counts = mod.cluster(
445        "20240101",
446        sources={"transcripts": True, "percepts": False, "agents": {"entities": True}},
447    )
448
449    assert counts["transcripts"] == 1
450    assert counts["agents"] == 1  # Only entities should be counted
451    assert "Entity extraction results" in result
452    assert "Meeting summary results" not in result
453    assert "Flow analysis results" not in result
454
455
456def test_cluster_with_agent_filter_multiple(tmp_path, monkeypatch):
457    """Test cluster() with dict selecting multiple agents."""
458    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
459    day_dir = day_path("20240101")
460
461    mod = importlib.import_module("think.cluster")
462
463    # Create segment with multiple agent output files
464    segment = day_dir / "default" / "120000_300"
465    segment.mkdir(parents=True)
466    (segment / "agents").mkdir()
467    (segment / "audio.jsonl").write_text('{}\n{"text": "hello"}\n')
468    (segment / "agents" / "entities.md").write_text("Entity extraction results")
469    (segment / "agents" / "meetings.md").write_text("Meeting summary results")
470    (segment / "agents" / "flow.md").write_text("Flow analysis results")
471
472    # Test filtering to include entities and meetings but not flow
473    result, counts = mod.cluster(
474        "20240101",
475        sources={
476            "transcripts": True,
477            "percepts": False,
478            "agents": {"entities": True, "meetings": "required", "flow": False},
479        },
480    )
481
482    assert counts["transcripts"] == 1
483    assert counts["agents"] == 2  # entities + meetings
484    assert "Entity extraction results" in result
485    assert "Meeting summary results" in result
486    assert "Flow analysis results" not in result
487
488
489def test_cluster_with_agent_filter_app_namespaced(tmp_path, monkeypatch):
490    """Test cluster() with dict filtering app-namespaced agent outputs."""
491    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
492    day_dir = day_path("20240101")
493
494    mod = importlib.import_module("think.cluster")
495
496    # Create segment with app-namespaced agent output files
497    # App agent output naming: "app:agent" -> "_app_agent.md"
498    segment = day_dir / "default" / "120000_300"
499    segment.mkdir(parents=True)
500    (segment / "agents").mkdir()
501    (segment / "audio.jsonl").write_text('{}\n{"text": "hello"}\n')
502    (segment / "agents" / "entities.md").write_text("System entity results")
503    (segment / "agents" / "_todos_review.md").write_text("Todos review results")
504
505    # Test filtering to include app-namespaced agent
506    result, counts = mod.cluster(
507        "20240101",
508        sources={
509            "transcripts": True,
510            "percepts": False,
511            "agents": {"entities": False, "todos:review": True},
512        },
513    )
514
515    assert counts["transcripts"] == 1
516    assert counts["agents"] == 1  # Only todos:review
517    assert "System entity results" not in result
518    assert "Todos review results" in result
519
520
521def test_cluster_with_empty_agent_filter(tmp_path, monkeypatch):
522    """Test cluster() with empty dict means no agents."""
523    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
524    day_dir = day_path("20240101")
525
526    mod = importlib.import_module("think.cluster")
527
528    segment = day_dir / "default" / "120000_300"
529    segment.mkdir(parents=True)
530    (segment / "agents").mkdir()
531    (segment / "audio.jsonl").write_text('{}\n{"text": "hello"}\n')
532    (segment / "agents" / "entities.md").write_text("Entity extraction results")
533
534    # Empty dict should mean no agents
535    result, counts = mod.cluster(
536        "20240101",
537        sources={"transcripts": True, "percepts": False, "agents": {}},
538    )
539
540    assert counts["transcripts"] == 1
541    assert counts["agents"] == 0
542    assert "Entity extraction results" not in result
543
544
545def test_filename_to_agent_key():
546    """Test _filename_to_agent_key conversion."""
547    from think.cluster import _filename_to_agent_key
548
549    # System agents
550    assert _filename_to_agent_key("entities") == "entities"
551    assert _filename_to_agent_key("flow") == "flow"
552
553    # App-namespaced agents
554    assert _filename_to_agent_key("_todos_review") == "todos:review"
555    assert _filename_to_agent_key("_entities_observer") == "entities:observer"
556
557    # Edge case: single underscore component
558    assert _filename_to_agent_key("_app") == "_app"  # No second part, returns as-is
559
560
561def test_agent_matches_filter():
562    """Test _agent_matches_filter logic."""
563    from think.cluster import _agent_matches_filter
564
565    # None filter means all agents
566    assert _agent_matches_filter("entities", None) is True
567    assert _agent_matches_filter("_todos_review", None) is True
568
569    # Empty dict means no agents
570    assert _agent_matches_filter("entities", {}) is False
571    assert _agent_matches_filter("_todos_review", {}) is False
572
573    # Specific filtering
574    filter_dict = {"entities": True, "meetings": False, "todos:review": "required"}
575    assert _agent_matches_filter("entities", filter_dict) is True
576    assert _agent_matches_filter("meetings", filter_dict) is False
577    assert _agent_matches_filter("_todos_review", filter_dict) is True
578    assert _agent_matches_filter("flow", filter_dict) is False  # Not in filter
579
580
581def test_scan_day_combined(tmp_path, monkeypatch):
582    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
583    day_dir = day_path("20240101")
584
585    mod = importlib.import_module("think.cluster")
586
587    first = day_dir / "default" / "090000_300"
588    first.mkdir(parents=True)
589    (first / "audio.jsonl").write_text("{}\n")
590    (first / "screen.jsonl").write_text('{"raw": "screen.webm"}\n')
591
592    second = day_dir / "default" / "093000_300"
593    second.mkdir(parents=True)
594    (second / "audio.jsonl").write_text("{}\n")
595
596    audio_ranges, screen_ranges, segments = mod.scan_day("20240101")
597    expected_ranges = mod.cluster_scan("20240101")
598    expected_segments = mod.cluster_segments("20240101")
599
600    assert audio_ranges == [("09:00", "09:15"), ("09:30", "09:45")]
601    assert screen_ranges == [("09:00", "09:15")]
602    assert segments == [
603        {
604            "key": "090000_300",
605            "start": "09:00",
606            "end": "09:05",
607            "types": ["audio", "screen"],
608            "stream": "default",
609        },
610        {
611            "key": "093000_300",
612            "start": "09:30",
613            "end": "09:35",
614            "types": ["audio"],
615            "stream": "default",
616        },
617    ]
618    assert (audio_ranges, screen_ranges) == expected_ranges
619    assert segments == expected_segments
620
621
622def test_scan_day_empty(tmp_path, monkeypatch):
623    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
624
625    mod = importlib.import_module("think.cluster")
626
627    assert mod.scan_day("20250101") == ([], [], [])
628
629
630def test_day_path_create_false(tmp_path, monkeypatch):
631    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
632
633    missing = day_path("29990101", create=False)
634    assert not missing.exists()
635
636    created = day_path("29990101")
637    assert created.exists()