# SPDX-License-Identifier: AGPL-3.0-only
# Copyright (c) 2026 sol pbc

import importlib
import json

import pytest


def test_number_lines_and_segments():
    mod = importlib.import_module("think.detect_transcript")
    numbered, lines = mod.number_lines("a\nb\nc\nd")
    assert numbered == "1: a\n2: b\n3: c\n4: d"
    assert lines == ["a", "b", "c", "d"]

    # Test parse_segment_boundaries with new format
    boundaries_json = json.dumps(
        [
            {"start_at": "12:00:00", "line": 2},
            {"start_at": "12:05:00", "line": 4},
        ]
    )
    boundaries = mod.parse_segment_boundaries(boundaries_json, len(lines))
    assert boundaries == [
        {"start_at": "12:00:00", "line": 2},
        {"start_at": "12:05:00", "line": 4},
    ]

    # Test segments_from_boundaries with new format
    segments = mod.segments_from_boundaries(lines, boundaries)
    assert segments == [("12:00:00", "b\nc"), ("12:05:00", "d")]


def test_parse_segment_boundaries_invalid():
    mod = importlib.import_module("think.detect_transcript")

    # Invalid JSON
    with pytest.raises(ValueError):
        mod.parse_segment_boundaries("not json", 3)

    # Empty list
    with pytest.raises(ValueError):
        mod.parse_segment_boundaries("[]", 3)

    # Not an object
    with pytest.raises(ValueError):
        mod.parse_segment_boundaries("[1]", 3)

    # Missing fields
    with pytest.raises(ValueError):
        mod.parse_segment_boundaries('[{"line": 1}]', 3)
    with pytest.raises(ValueError):
        mod.parse_segment_boundaries('[{"start_at": "12:00:00"}]', 3)

    # Invalid line number (0)
    with pytest.raises(ValueError):
        mod.parse_segment_boundaries('[{"start_at": "12:00:00", "line": 0}]', 3)

    # Line number exceeds max
    with pytest.raises(ValueError):
        mod.parse_segment_boundaries('[{"start_at": "12:00:00", "line": 5}]', 3)

    # Non-increasing line numbers
    with pytest.raises(ValueError):
        mod.parse_segment_boundaries(
            '[{"start_at": "12:00:00", "line": 2}, {"start_at": "12:05:00", "line": 1}]',
            3,
        )


def test_detect_transcript_segment(monkeypatch):
    mod = importlib.import_module("think.detect_transcript")

    # Mock returns new format with start_at and line
    def mock_generate(**kwargs):
        return (
            '[{"start_at": "14:30:00", "line": 1}, {"start_at": "14:35:00", "line": 3}]'
        )

    monkeypatch.setattr("think.models.generate", mock_generate)

    # Now requires start_time argument
    result = mod.detect_transcript_segment("a\nb\nc\nd", "14:30:00")

    # Returns list of (start_at, text) tuples
    assert result == [("14:30:00", "a\nb"), ("14:35:00", "c\nd")]