tests/test_detect_transcript.py at main

solpbc.org / solstone
fork atom
personal memory agent
fork atom
solstone / tests / test_detect_transcript.py
at main 86 lines 2.7 kB view raw
wrap content
Jer Miller Consolidate muse/ package into think/ 2mo ago
8591465e
 1# SPDX-License-Identifier: AGPL-3.0-only
 2# Copyright (c) 2026 sol pbc
 3
 4import importlib
 5import json
 6
 7import pytest
 8
 9
10def test_number_lines_and_segments():
11    mod = importlib.import_module("think.detect_transcript")
12    numbered, lines = mod.number_lines("a\nb\nc\nd")
13    assert numbered == "1: a\n2: b\n3: c\n4: d"
14    assert lines == ["a", "b", "c", "d"]
15
16    # Test parse_segment_boundaries with new format
17    boundaries_json = json.dumps(
18        [
19            {"start_at": "12:00:00", "line": 2},
20            {"start_at": "12:05:00", "line": 4},
21        ]
22    )
23    boundaries = mod.parse_segment_boundaries(boundaries_json, len(lines))
24    assert boundaries == [
25        {"start_at": "12:00:00", "line": 2},
26        {"start_at": "12:05:00", "line": 4},
27    ]
28
29    # Test segments_from_boundaries with new format
30    segments = mod.segments_from_boundaries(lines, boundaries)
31    assert segments == [("12:00:00", "b\nc"), ("12:05:00", "d")]
32
33
34def test_parse_segment_boundaries_invalid():
35    mod = importlib.import_module("think.detect_transcript")
36
37    # Invalid JSON
38    with pytest.raises(ValueError):
39        mod.parse_segment_boundaries("not json", 3)
40
41    # Empty list
42    with pytest.raises(ValueError):
43        mod.parse_segment_boundaries("[]", 3)
44
45    # Not an object
46    with pytest.raises(ValueError):
47        mod.parse_segment_boundaries("[1]", 3)
48
49    # Missing fields
50    with pytest.raises(ValueError):
51        mod.parse_segment_boundaries('[{"line": 1}]', 3)
52    with pytest.raises(ValueError):
53        mod.parse_segment_boundaries('[{"start_at": "12:00:00"}]', 3)
54
55    # Invalid line number (0)
56    with pytest.raises(ValueError):
57        mod.parse_segment_boundaries('[{"start_at": "12:00:00", "line": 0}]', 3)
58
59    # Line number exceeds max
60    with pytest.raises(ValueError):
61        mod.parse_segment_boundaries('[{"start_at": "12:00:00", "line": 5}]', 3)
62
63    # Non-increasing line numbers
64    with pytest.raises(ValueError):
65        mod.parse_segment_boundaries(
66            '[{"start_at": "12:00:00", "line": 2}, {"start_at": "12:05:00", "line": 1}]',
67            3,
68        )
69
70
71def test_detect_transcript_segment(monkeypatch):
72    mod = importlib.import_module("think.detect_transcript")
73
74    # Mock returns new format with start_at and line
75    def mock_generate(**kwargs):
76        return (
77            '[{"start_at": "14:30:00", "line": 1}, {"start_at": "14:35:00", "line": 3}]'
78        )
79
80    monkeypatch.setattr("think.models.generate", mock_generate)
81
82    # Now requires start_time argument
83    result = mod.detect_transcript_segment("a\nb\nc\nd", "14:30:00")
84
85    # Returns list of (start_at, text) tuples
86    assert result == [("14:30:00", "a\nb"), ("14:35:00", "c\nd")]