"""OR1 Assembler package.

Public API for assembling dfasm source to emulator-ready configuration:
- assemble(): Parse → Lower → Resolve → Place → Allocate → Codegen (direct mode)
- assemble_to_tokens(): ... → Codegen (token stream mode)
- serialize_graph(): Serialize an IRGraph to dfasm at any pipeline stage
- round_trip(): Parse → Lower → Serialize (convenience for round-trip testing)
"""

from lark import Lark
from pathlib import Path
import dataclasses

from asm.lower import lower
from asm.expand import expand
from asm.resolve import resolve
from asm.place import place
from asm.allocate import allocate
from asm.codegen import generate_direct, generate_tokens, AssemblyResult
from asm.errors import ErrorSeverity, format_error
from asm.serialize import serialize as _serialize_graph
from asm.ir import IRGraph
from asm.builtins import BUILTIN_MACROS, _BUILTIN_LINE_COUNT

_GRAMMAR_PATH = Path(__file__).parent.parent / "dfasm.lark"
_parser = None


def _has_errors(graph: IRGraph) -> bool:
    """Check if graph has any errors (not warnings)."""
    return any(e.severity == ErrorSeverity.ERROR for e in graph.errors)


def _format_pipeline_errors(graph: IRGraph, full_source: str, stage: str) -> str:
    """Format pipeline errors with builtin line offset adjustment."""
    offset = graph.builtin_line_offset
    formatted = [
        format_error(e, full_source, builtin_line_offset=offset)
        for e in graph.errors
        if e.severity == ErrorSeverity.ERROR
    ]
    return f"{stage} errors:\n" + "\n".join(formatted)


def _get_parser():
    """Lazily initialize and cache the Lark parser."""
    global _parser
    if _parser is None:
        _parser = Lark(
            _GRAMMAR_PATH.read_text(),
            parser="earley",
            propagate_positions=True,
        )
    return _parser


def run_pipeline(source: str) -> IRGraph:
    """Run the shared assembly pipeline: parse → lower → expand → resolve → place → allocate.

    This is the common pipeline used by both assemble() and assemble_to_tokens().
    Error checking happens after each stage.

    Built-in macros are prepended to user source before parsing, making them available
    in all programs without explicit import.

    Args:
        source: dfasm source code as a string

    Returns:
        The fully processed IRGraph ready for code generation

    Raises:
        ValueError: If any pipeline stage reports errors
    """
    # Prepend built-in macros to user source
    full_source = BUILTIN_MACROS + "\n" + source
    tree = _get_parser().parse(full_source)
    graph = lower(tree)
    # Record the line offset for error reporting adjustment
    graph = dataclasses.replace(graph, builtin_line_offset=_BUILTIN_LINE_COUNT)
    graph = expand(graph)
    graph = resolve(graph)
    if _has_errors(graph):
        raise ValueError(_format_pipeline_errors(graph, full_source, "Assembly"))
    graph = place(graph)
    if _has_errors(graph):
        raise ValueError(_format_pipeline_errors(graph, full_source, "Placement"))
    graph = allocate(graph)
    if _has_errors(graph):
        raise ValueError(_format_pipeline_errors(graph, full_source, "Allocation"))
    return graph


def assemble(source: str) -> AssemblyResult:
    """Assemble dfasm source to direct-mode emulator config.

    Chains the full pipeline: parse → lower → resolve → place → allocate → codegen.
    Returns PEConfig/SMConfig lists and seed tokens for direct system setup.

    Args:
        source: dfasm source code as a string

    Returns:
        AssemblyResult containing pe_configs, sm_configs, and seed_tokens

    Raises:
        ValueError: If any pipeline stage reports errors
    """
    graph = run_pipeline(source)
    return generate_direct(graph)


def assemble_to_tokens(source: str) -> list:
    """Assemble dfasm source to hardware-faithful bootstrap token stream.

    Chains the full pipeline: parse → lower → resolve → place → allocate → codegen (token mode).
    Returns an ordered sequence: SM init tokens → IRAM write tokens → seed tokens.
    This sequence is consumable by emulator System.inject() and System.load().

    Args:
        source: dfasm source code as a string

    Returns:
        List of tokens (SMToken, PELocalWriteToken, MonadToken) in bootstrap order

    Raises:
        ValueError: If any pipeline stage reports errors
    """
    graph = run_pipeline(source)
    return generate_tokens(graph)


def serialize_graph(graph: IRGraph) -> str:
    """Serialize an IRGraph to dfasm source text.

    Works at any pipeline stage (after lowering, resolution, placement, or allocation).
    Useful for inspecting IR transformations after each pass:
        graph = lower(parse(source))
        print(serialize_graph(graph))  # inspect after lowering

    Args:
        graph: The IRGraph to serialize

    Returns:
        Valid dfasm source text
    """
    return _serialize_graph(graph)


def round_trip(source: str) -> str:
    """Parse, lower, and serialize back to dfasm (convenience for round-trip testing).

    Quick round-trip for testing: source → parse → lower → serialize → source.
    Does not run resolution, placement, or allocation.

    Args:
        source: dfasm source code as a string

    Returns:
        Valid dfasm source text

    Raises:
        ValueError: If parsing fails
    """
    tree = _get_parser().parse(source)
    graph = lower(tree)
    return _serialize_graph(graph)


__all__ = [
    "assemble",
    "assemble_to_tokens",
    "round_trip",
    "run_pipeline",
    "serialize_graph",
    "AssemblyResult",
]