personal memory agent
1# SPDX-License-Identifier: AGPL-3.0-only
2# Copyright (c) 2026 sol pbc
3
4import importlib
5import json
6
7import pytest
8
9
10def test_number_lines_and_segments():
11 mod = importlib.import_module("think.detect_transcript")
12 numbered, lines = mod.number_lines("a\nb\nc\nd")
13 assert numbered == "1: a\n2: b\n3: c\n4: d"
14 assert lines == ["a", "b", "c", "d"]
15
16 # Test parse_segment_boundaries with new format
17 boundaries_json = json.dumps(
18 [
19 {"start_at": "12:00:00", "line": 2},
20 {"start_at": "12:05:00", "line": 4},
21 ]
22 )
23 boundaries = mod.parse_segment_boundaries(boundaries_json, len(lines))
24 assert boundaries == [
25 {"start_at": "12:00:00", "line": 2},
26 {"start_at": "12:05:00", "line": 4},
27 ]
28
29 # Test segments_from_boundaries with new format
30 segments = mod.segments_from_boundaries(lines, boundaries)
31 assert segments == [("12:00:00", "b\nc"), ("12:05:00", "d")]
32
33
34def test_parse_segment_boundaries_invalid():
35 mod = importlib.import_module("think.detect_transcript")
36
37 # Invalid JSON
38 with pytest.raises(ValueError):
39 mod.parse_segment_boundaries("not json", 3)
40
41 # Empty list
42 with pytest.raises(ValueError):
43 mod.parse_segment_boundaries("[]", 3)
44
45 # Not an object
46 with pytest.raises(ValueError):
47 mod.parse_segment_boundaries("[1]", 3)
48
49 # Missing fields
50 with pytest.raises(ValueError):
51 mod.parse_segment_boundaries('[{"line": 1}]', 3)
52 with pytest.raises(ValueError):
53 mod.parse_segment_boundaries('[{"start_at": "12:00:00"}]', 3)
54
55 # Invalid line number (0)
56 with pytest.raises(ValueError):
57 mod.parse_segment_boundaries('[{"start_at": "12:00:00", "line": 0}]', 3)
58
59 # Line number exceeds max
60 with pytest.raises(ValueError):
61 mod.parse_segment_boundaries('[{"start_at": "12:00:00", "line": 5}]', 3)
62
63 # Non-increasing line numbers
64 with pytest.raises(ValueError):
65 mod.parse_segment_boundaries(
66 '[{"start_at": "12:00:00", "line": 2}, {"start_at": "12:05:00", "line": 1}]',
67 3,
68 )
69
70
71def test_detect_transcript_segment(monkeypatch):
72 mod = importlib.import_module("think.detect_transcript")
73
74 # Mock returns new format with start_at and line
75 def mock_generate(**kwargs):
76 return (
77 '[{"start_at": "14:30:00", "line": 1}, {"start_at": "14:35:00", "line": 3}]'
78 )
79
80 monkeypatch.setattr("think.models.generate", mock_generate)
81
82 # Now requires start_time argument
83 result = mod.detect_transcript_segment("a\nb\nc\nd", "14:30:00")
84
85 # Returns list of (start_at, text) tuples
86 assert result == [("14:30:00", "a\nb"), ("14:35:00", "c\nd")]