OR-1 dataflow CPU sketch
at main 177 lines 5.6 kB view raw
1"""OR1 Assembler package. 2 3Public API for assembling dfasm source to emulator-ready configuration: 4- assemble(): Parse → Lower → Resolve → Place → Allocate → Codegen (direct mode) 5- assemble_to_tokens(): ... → Codegen (token stream mode) 6- serialize_graph(): Serialize an IRGraph to dfasm at any pipeline stage 7- round_trip(): Parse → Lower → Serialize (convenience for round-trip testing) 8""" 9 10from lark import Lark 11from pathlib import Path 12import dataclasses 13 14from asm.lower import lower 15from asm.expand import expand 16from asm.resolve import resolve 17from asm.place import place 18from asm.allocate import allocate 19from asm.codegen import generate_direct, generate_tokens, AssemblyResult 20from asm.errors import ErrorSeverity, format_error 21from asm.serialize import serialize as _serialize_graph 22from asm.ir import IRGraph 23from asm.builtins import BUILTIN_MACROS, _BUILTIN_LINE_COUNT 24 25_GRAMMAR_PATH = Path(__file__).parent.parent / "dfasm.lark" 26_parser = None 27 28 29def _has_errors(graph: IRGraph) -> bool: 30 """Check if graph has any errors (not warnings).""" 31 return any(e.severity == ErrorSeverity.ERROR for e in graph.errors) 32 33 34def _format_pipeline_errors(graph: IRGraph, full_source: str, stage: str) -> str: 35 """Format pipeline errors with builtin line offset adjustment.""" 36 offset = graph.builtin_line_offset 37 formatted = [ 38 format_error(e, full_source, builtin_line_offset=offset) 39 for e in graph.errors 40 if e.severity == ErrorSeverity.ERROR 41 ] 42 return f"{stage} errors:\n" + "\n".join(formatted) 43 44 45def _get_parser(): 46 """Lazily initialize and cache the Lark parser.""" 47 global _parser 48 if _parser is None: 49 _parser = Lark( 50 _GRAMMAR_PATH.read_text(), 51 parser="earley", 52 propagate_positions=True, 53 ) 54 return _parser 55 56 57def run_pipeline(source: str) -> IRGraph: 58 """Run the shared assembly pipeline: parse → lower → expand → resolve → place → allocate. 59 60 This is the common pipeline used by both assemble() and assemble_to_tokens(). 61 Error checking happens after each stage. 62 63 Built-in macros are prepended to user source before parsing, making them available 64 in all programs without explicit import. 65 66 Args: 67 source: dfasm source code as a string 68 69 Returns: 70 The fully processed IRGraph ready for code generation 71 72 Raises: 73 ValueError: If any pipeline stage reports errors 74 """ 75 # Prepend built-in macros to user source 76 full_source = BUILTIN_MACROS + "\n" + source 77 tree = _get_parser().parse(full_source) 78 graph = lower(tree) 79 # Record the line offset for error reporting adjustment 80 graph = dataclasses.replace(graph, builtin_line_offset=_BUILTIN_LINE_COUNT) 81 graph = expand(graph) 82 graph = resolve(graph) 83 if _has_errors(graph): 84 raise ValueError(_format_pipeline_errors(graph, full_source, "Assembly")) 85 graph = place(graph) 86 if _has_errors(graph): 87 raise ValueError(_format_pipeline_errors(graph, full_source, "Placement")) 88 graph = allocate(graph) 89 if _has_errors(graph): 90 raise ValueError(_format_pipeline_errors(graph, full_source, "Allocation")) 91 return graph 92 93 94def assemble(source: str) -> AssemblyResult: 95 """Assemble dfasm source to direct-mode emulator config. 96 97 Chains the full pipeline: parse → lower → resolve → place → allocate → codegen. 98 Returns PEConfig/SMConfig lists and seed tokens for direct system setup. 99 100 Args: 101 source: dfasm source code as a string 102 103 Returns: 104 AssemblyResult containing pe_configs, sm_configs, and seed_tokens 105 106 Raises: 107 ValueError: If any pipeline stage reports errors 108 """ 109 graph = run_pipeline(source) 110 return generate_direct(graph) 111 112 113def assemble_to_tokens(source: str) -> list: 114 """Assemble dfasm source to hardware-faithful bootstrap token stream. 115 116 Chains the full pipeline: parse → lower → resolve → place → allocate → codegen (token mode). 117 Returns an ordered sequence: SM init tokens → IRAM write tokens → seed tokens. 118 This sequence is consumable by emulator System.inject() and System.load(). 119 120 Args: 121 source: dfasm source code as a string 122 123 Returns: 124 List of tokens (SMToken, PELocalWriteToken, MonadToken) in bootstrap order 125 126 Raises: 127 ValueError: If any pipeline stage reports errors 128 """ 129 graph = run_pipeline(source) 130 return generate_tokens(graph) 131 132 133def serialize_graph(graph: IRGraph) -> str: 134 """Serialize an IRGraph to dfasm source text. 135 136 Works at any pipeline stage (after lowering, resolution, placement, or allocation). 137 Useful for inspecting IR transformations after each pass: 138 graph = lower(parse(source)) 139 print(serialize_graph(graph)) # inspect after lowering 140 141 Args: 142 graph: The IRGraph to serialize 143 144 Returns: 145 Valid dfasm source text 146 """ 147 return _serialize_graph(graph) 148 149 150def round_trip(source: str) -> str: 151 """Parse, lower, and serialize back to dfasm (convenience for round-trip testing). 152 153 Quick round-trip for testing: source → parse → lower → serialize → source. 154 Does not run resolution, placement, or allocation. 155 156 Args: 157 source: dfasm source code as a string 158 159 Returns: 160 Valid dfasm source text 161 162 Raises: 163 ValueError: If parsing fails 164 """ 165 tree = _get_parser().parse(source) 166 graph = lower(tree) 167 return _serialize_graph(graph) 168 169 170__all__ = [ 171 "assemble", 172 "assemble_to_tokens", 173 "round_trip", 174 "run_pipeline", 175 "serialize_graph", 176 "AssemblyResult", 177]