personal memory agent
at main 86 lines 2.7 kB view raw
1# SPDX-License-Identifier: AGPL-3.0-only 2# Copyright (c) 2026 sol pbc 3 4import importlib 5import json 6 7import pytest 8 9 10def test_number_lines_and_segments(): 11 mod = importlib.import_module("think.detect_transcript") 12 numbered, lines = mod.number_lines("a\nb\nc\nd") 13 assert numbered == "1: a\n2: b\n3: c\n4: d" 14 assert lines == ["a", "b", "c", "d"] 15 16 # Test parse_segment_boundaries with new format 17 boundaries_json = json.dumps( 18 [ 19 {"start_at": "12:00:00", "line": 2}, 20 {"start_at": "12:05:00", "line": 4}, 21 ] 22 ) 23 boundaries = mod.parse_segment_boundaries(boundaries_json, len(lines)) 24 assert boundaries == [ 25 {"start_at": "12:00:00", "line": 2}, 26 {"start_at": "12:05:00", "line": 4}, 27 ] 28 29 # Test segments_from_boundaries with new format 30 segments = mod.segments_from_boundaries(lines, boundaries) 31 assert segments == [("12:00:00", "b\nc"), ("12:05:00", "d")] 32 33 34def test_parse_segment_boundaries_invalid(): 35 mod = importlib.import_module("think.detect_transcript") 36 37 # Invalid JSON 38 with pytest.raises(ValueError): 39 mod.parse_segment_boundaries("not json", 3) 40 41 # Empty list 42 with pytest.raises(ValueError): 43 mod.parse_segment_boundaries("[]", 3) 44 45 # Not an object 46 with pytest.raises(ValueError): 47 mod.parse_segment_boundaries("[1]", 3) 48 49 # Missing fields 50 with pytest.raises(ValueError): 51 mod.parse_segment_boundaries('[{"line": 1}]', 3) 52 with pytest.raises(ValueError): 53 mod.parse_segment_boundaries('[{"start_at": "12:00:00"}]', 3) 54 55 # Invalid line number (0) 56 with pytest.raises(ValueError): 57 mod.parse_segment_boundaries('[{"start_at": "12:00:00", "line": 0}]', 3) 58 59 # Line number exceeds max 60 with pytest.raises(ValueError): 61 mod.parse_segment_boundaries('[{"start_at": "12:00:00", "line": 5}]', 3) 62 63 # Non-increasing line numbers 64 with pytest.raises(ValueError): 65 mod.parse_segment_boundaries( 66 '[{"start_at": "12:00:00", "line": 2}, {"start_at": "12:05:00", "line": 1}]', 67 3, 68 ) 69 70 71def test_detect_transcript_segment(monkeypatch): 72 mod = importlib.import_module("think.detect_transcript") 73 74 # Mock returns new format with start_at and line 75 def mock_generate(**kwargs): 76 return ( 77 '[{"start_at": "14:30:00", "line": 1}, {"start_at": "14:35:00", "line": 3}]' 78 ) 79 80 monkeypatch.setattr("think.models.generate", mock_generate) 81 82 # Now requires start_time argument 83 result = mod.detect_transcript_segment("a\nb\nc\nd", "14:30:00") 84 85 # Returns list of (start_at, text) tuples 86 assert result == [("14:30:00", "a\nb"), ("14:35:00", "c\nd")]