asm/__init__.py at main · nonbinary.computer/or1-design

nonbinary.computer / or1-design
fork atom
OR-1 dataflow CPU sketch
fork atom
or1-design / asm / __init__.py
at main 177 lines 5.6 kB view raw
wrap content
Orual feat: rewrite ProcessingElement with frame-based matching, output routing, and unified instruction set 10d ago
65613978
  1"""OR1 Assembler package.
  2
  3Public API for assembling dfasm source to emulator-ready configuration:
  4- assemble(): Parse → Lower → Resolve → Place → Allocate → Codegen (direct mode)
  5- assemble_to_tokens(): ... → Codegen (token stream mode)
  6- serialize_graph(): Serialize an IRGraph to dfasm at any pipeline stage
  7- round_trip(): Parse → Lower → Serialize (convenience for round-trip testing)
  8"""
  9
 10from lark import Lark
 11from pathlib import Path
 12import dataclasses
 13
 14from asm.lower import lower
 15from asm.expand import expand
 16from asm.resolve import resolve
 17from asm.place import place
 18from asm.allocate import allocate
 19from asm.codegen import generate_direct, generate_tokens, AssemblyResult
 20from asm.errors import ErrorSeverity, format_error
 21from asm.serialize import serialize as _serialize_graph
 22from asm.ir import IRGraph
 23from asm.builtins import BUILTIN_MACROS, _BUILTIN_LINE_COUNT
 24
 25_GRAMMAR_PATH = Path(__file__).parent.parent / "dfasm.lark"
 26_parser = None
 27
 28
 29def _has_errors(graph: IRGraph) -> bool:
 30    """Check if graph has any errors (not warnings)."""
 31    return any(e.severity == ErrorSeverity.ERROR for e in graph.errors)
 32
 33
 34def _format_pipeline_errors(graph: IRGraph, full_source: str, stage: str) -> str:
 35    """Format pipeline errors with builtin line offset adjustment."""
 36    offset = graph.builtin_line_offset
 37    formatted = [
 38        format_error(e, full_source, builtin_line_offset=offset)
 39        for e in graph.errors
 40        if e.severity == ErrorSeverity.ERROR
 41    ]
 42    return f"{stage} errors:\n" + "\n".join(formatted)
 43
 44
 45def _get_parser():
 46    """Lazily initialize and cache the Lark parser."""
 47    global _parser
 48    if _parser is None:
 49        _parser = Lark(
 50            _GRAMMAR_PATH.read_text(),
 51            parser="earley",
 52            propagate_positions=True,
 53        )
 54    return _parser
 55
 56
 57def run_pipeline(source: str) -> IRGraph:
 58    """Run the shared assembly pipeline: parse → lower → expand → resolve → place → allocate.
 59
 60    This is the common pipeline used by both assemble() and assemble_to_tokens().
 61    Error checking happens after each stage.
 62
 63    Built-in macros are prepended to user source before parsing, making them available
 64    in all programs without explicit import.
 65
 66    Args:
 67        source: dfasm source code as a string
 68
 69    Returns:
 70        The fully processed IRGraph ready for code generation
 71
 72    Raises:
 73        ValueError: If any pipeline stage reports errors
 74    """
 75    # Prepend built-in macros to user source
 76    full_source = BUILTIN_MACROS + "\n" + source
 77    tree = _get_parser().parse(full_source)
 78    graph = lower(tree)
 79    # Record the line offset for error reporting adjustment
 80    graph = dataclasses.replace(graph, builtin_line_offset=_BUILTIN_LINE_COUNT)
 81    graph = expand(graph)
 82    graph = resolve(graph)
 83    if _has_errors(graph):
 84        raise ValueError(_format_pipeline_errors(graph, full_source, "Assembly"))
 85    graph = place(graph)
 86    if _has_errors(graph):
 87        raise ValueError(_format_pipeline_errors(graph, full_source, "Placement"))
 88    graph = allocate(graph)
 89    if _has_errors(graph):
 90        raise ValueError(_format_pipeline_errors(graph, full_source, "Allocation"))
 91    return graph
 92
 93
 94def assemble(source: str) -> AssemblyResult:
 95    """Assemble dfasm source to direct-mode emulator config.
 96
 97    Chains the full pipeline: parse → lower → resolve → place → allocate → codegen.
 98    Returns PEConfig/SMConfig lists and seed tokens for direct system setup.
 99
100    Args:
101        source: dfasm source code as a string
102
103    Returns:
104        AssemblyResult containing pe_configs, sm_configs, and seed_tokens
105
106    Raises:
107        ValueError: If any pipeline stage reports errors
108    """
109    graph = run_pipeline(source)
110    return generate_direct(graph)
111
112
113def assemble_to_tokens(source: str) -> list:
114    """Assemble dfasm source to hardware-faithful bootstrap token stream.
115
116    Chains the full pipeline: parse → lower → resolve → place → allocate → codegen (token mode).
117    Returns an ordered sequence: SM init tokens → IRAM write tokens → seed tokens.
118    This sequence is consumable by emulator System.inject() and System.load().
119
120    Args:
121        source: dfasm source code as a string
122
123    Returns:
124        List of tokens (SMToken, PELocalWriteToken, MonadToken) in bootstrap order
125
126    Raises:
127        ValueError: If any pipeline stage reports errors
128    """
129    graph = run_pipeline(source)
130    return generate_tokens(graph)
131
132
133def serialize_graph(graph: IRGraph) -> str:
134    """Serialize an IRGraph to dfasm source text.
135
136    Works at any pipeline stage (after lowering, resolution, placement, or allocation).
137    Useful for inspecting IR transformations after each pass:
138        graph = lower(parse(source))
139        print(serialize_graph(graph))  # inspect after lowering
140
141    Args:
142        graph: The IRGraph to serialize
143
144    Returns:
145        Valid dfasm source text
146    """
147    return _serialize_graph(graph)
148
149
150def round_trip(source: str) -> str:
151    """Parse, lower, and serialize back to dfasm (convenience for round-trip testing).
152
153    Quick round-trip for testing: source → parse → lower → serialize → source.
154    Does not run resolution, placement, or allocation.
155
156    Args:
157        source: dfasm source code as a string
158
159    Returns:
160        Valid dfasm source text
161
162    Raises:
163        ValueError: If parsing fails
164    """
165    tree = _get_parser().parse(source)
166    graph = lower(tree)
167    return _serialize_graph(graph)
168
169
170__all__ = [
171    "assemble",
172    "assemble_to_tokens",
173    "round_trip",
174    "run_pipeline",
175    "serialize_graph",
176    "AssemblyResult",
177]