"""OR1 Assembler package. Public API for assembling dfasm source to emulator-ready configuration: - assemble(): Parse → Lower → Resolve → Place → Allocate → Codegen (direct mode) - assemble_to_tokens(): ... → Codegen (token stream mode) - serialize_graph(): Serialize an IRGraph to dfasm at any pipeline stage - round_trip(): Parse → Lower → Serialize (convenience for round-trip testing) """ from lark import Lark from pathlib import Path import dataclasses from asm.lower import lower from asm.expand import expand from asm.resolve import resolve from asm.place import place from asm.allocate import allocate from asm.codegen import generate_direct, generate_tokens, AssemblyResult from asm.errors import ErrorSeverity, format_error from asm.serialize import serialize as _serialize_graph from asm.ir import IRGraph from asm.builtins import BUILTIN_MACROS, _BUILTIN_LINE_COUNT _GRAMMAR_PATH = Path(__file__).parent.parent / "dfasm.lark" _parser = None def _has_errors(graph: IRGraph) -> bool: """Check if graph has any errors (not warnings).""" return any(e.severity == ErrorSeverity.ERROR for e in graph.errors) def _format_pipeline_errors(graph: IRGraph, full_source: str, stage: str) -> str: """Format pipeline errors with builtin line offset adjustment.""" offset = graph.builtin_line_offset formatted = [ format_error(e, full_source, builtin_line_offset=offset) for e in graph.errors if e.severity == ErrorSeverity.ERROR ] return f"{stage} errors:\n" + "\n".join(formatted) def _get_parser(): """Lazily initialize and cache the Lark parser.""" global _parser if _parser is None: _parser = Lark( _GRAMMAR_PATH.read_text(), parser="earley", propagate_positions=True, ) return _parser def run_pipeline(source: str) -> IRGraph: """Run the shared assembly pipeline: parse → lower → expand → resolve → place → allocate. This is the common pipeline used by both assemble() and assemble_to_tokens(). Error checking happens after each stage. Built-in macros are prepended to user source before parsing, making them available in all programs without explicit import. Args: source: dfasm source code as a string Returns: The fully processed IRGraph ready for code generation Raises: ValueError: If any pipeline stage reports errors """ # Prepend built-in macros to user source full_source = BUILTIN_MACROS + "\n" + source tree = _get_parser().parse(full_source) graph = lower(tree) # Record the line offset for error reporting adjustment graph = dataclasses.replace(graph, builtin_line_offset=_BUILTIN_LINE_COUNT) graph = expand(graph) graph = resolve(graph) if _has_errors(graph): raise ValueError(_format_pipeline_errors(graph, full_source, "Assembly")) graph = place(graph) if _has_errors(graph): raise ValueError(_format_pipeline_errors(graph, full_source, "Placement")) graph = allocate(graph) if _has_errors(graph): raise ValueError(_format_pipeline_errors(graph, full_source, "Allocation")) return graph def assemble(source: str) -> AssemblyResult: """Assemble dfasm source to direct-mode emulator config. Chains the full pipeline: parse → lower → resolve → place → allocate → codegen. Returns PEConfig/SMConfig lists and seed tokens for direct system setup. Args: source: dfasm source code as a string Returns: AssemblyResult containing pe_configs, sm_configs, and seed_tokens Raises: ValueError: If any pipeline stage reports errors """ graph = run_pipeline(source) return generate_direct(graph) def assemble_to_tokens(source: str) -> list: """Assemble dfasm source to hardware-faithful bootstrap token stream. Chains the full pipeline: parse → lower → resolve → place → allocate → codegen (token mode). Returns an ordered sequence: SM init tokens → IRAM write tokens → seed tokens. This sequence is consumable by emulator System.inject() and System.load(). Args: source: dfasm source code as a string Returns: List of tokens (SMToken, PELocalWriteToken, MonadToken) in bootstrap order Raises: ValueError: If any pipeline stage reports errors """ graph = run_pipeline(source) return generate_tokens(graph) def serialize_graph(graph: IRGraph) -> str: """Serialize an IRGraph to dfasm source text. Works at any pipeline stage (after lowering, resolution, placement, or allocation). Useful for inspecting IR transformations after each pass: graph = lower(parse(source)) print(serialize_graph(graph)) # inspect after lowering Args: graph: The IRGraph to serialize Returns: Valid dfasm source text """ return _serialize_graph(graph) def round_trip(source: str) -> str: """Parse, lower, and serialize back to dfasm (convenience for round-trip testing). Quick round-trip for testing: source → parse → lower → serialize → source. Does not run resolution, placement, or allocation. Args: source: dfasm source code as a string Returns: Valid dfasm source text Raises: ValueError: If parsing fails """ tree = _get_parser().parse(source) graph = lower(tree) return _serialize_graph(graph) __all__ = [ "assemble", "assemble_to_tokens", "round_trip", "run_pipeline", "serialize_graph", "AssemblyResult", ]