OR-1 dataflow CPU sketch
1"""OR1 Assembler package.
2
3Public API for assembling dfasm source to emulator-ready configuration:
4- assemble(): Parse → Lower → Resolve → Place → Allocate → Codegen (direct mode)
5- assemble_to_tokens(): ... → Codegen (token stream mode)
6- serialize_graph(): Serialize an IRGraph to dfasm at any pipeline stage
7- round_trip(): Parse → Lower → Serialize (convenience for round-trip testing)
8"""
9
10from lark import Lark
11from pathlib import Path
12import dataclasses
13
14from asm.lower import lower
15from asm.expand import expand
16from asm.resolve import resolve
17from asm.place import place
18from asm.allocate import allocate
19from asm.codegen import generate_direct, generate_tokens, AssemblyResult
20from asm.errors import ErrorSeverity, format_error
21from asm.serialize import serialize as _serialize_graph
22from asm.ir import IRGraph
23from asm.builtins import BUILTIN_MACROS, _BUILTIN_LINE_COUNT
24
25_GRAMMAR_PATH = Path(__file__).parent.parent / "dfasm.lark"
26_parser = None
27
28
29def _has_errors(graph: IRGraph) -> bool:
30 """Check if graph has any errors (not warnings)."""
31 return any(e.severity == ErrorSeverity.ERROR for e in graph.errors)
32
33
34def _format_pipeline_errors(graph: IRGraph, full_source: str, stage: str) -> str:
35 """Format pipeline errors with builtin line offset adjustment."""
36 offset = graph.builtin_line_offset
37 formatted = [
38 format_error(e, full_source, builtin_line_offset=offset)
39 for e in graph.errors
40 if e.severity == ErrorSeverity.ERROR
41 ]
42 return f"{stage} errors:\n" + "\n".join(formatted)
43
44
45def _get_parser():
46 """Lazily initialize and cache the Lark parser."""
47 global _parser
48 if _parser is None:
49 _parser = Lark(
50 _GRAMMAR_PATH.read_text(),
51 parser="earley",
52 propagate_positions=True,
53 )
54 return _parser
55
56
57def run_pipeline(source: str) -> IRGraph:
58 """Run the shared assembly pipeline: parse → lower → expand → resolve → place → allocate.
59
60 This is the common pipeline used by both assemble() and assemble_to_tokens().
61 Error checking happens after each stage.
62
63 Built-in macros are prepended to user source before parsing, making them available
64 in all programs without explicit import.
65
66 Args:
67 source: dfasm source code as a string
68
69 Returns:
70 The fully processed IRGraph ready for code generation
71
72 Raises:
73 ValueError: If any pipeline stage reports errors
74 """
75 # Prepend built-in macros to user source
76 full_source = BUILTIN_MACROS + "\n" + source
77 tree = _get_parser().parse(full_source)
78 graph = lower(tree)
79 # Record the line offset for error reporting adjustment
80 graph = dataclasses.replace(graph, builtin_line_offset=_BUILTIN_LINE_COUNT)
81 graph = expand(graph)
82 graph = resolve(graph)
83 if _has_errors(graph):
84 raise ValueError(_format_pipeline_errors(graph, full_source, "Assembly"))
85 graph = place(graph)
86 if _has_errors(graph):
87 raise ValueError(_format_pipeline_errors(graph, full_source, "Placement"))
88 graph = allocate(graph)
89 if _has_errors(graph):
90 raise ValueError(_format_pipeline_errors(graph, full_source, "Allocation"))
91 return graph
92
93
94def assemble(source: str) -> AssemblyResult:
95 """Assemble dfasm source to direct-mode emulator config.
96
97 Chains the full pipeline: parse → lower → resolve → place → allocate → codegen.
98 Returns PEConfig/SMConfig lists and seed tokens for direct system setup.
99
100 Args:
101 source: dfasm source code as a string
102
103 Returns:
104 AssemblyResult containing pe_configs, sm_configs, and seed_tokens
105
106 Raises:
107 ValueError: If any pipeline stage reports errors
108 """
109 graph = run_pipeline(source)
110 return generate_direct(graph)
111
112
113def assemble_to_tokens(source: str) -> list:
114 """Assemble dfasm source to hardware-faithful bootstrap token stream.
115
116 Chains the full pipeline: parse → lower → resolve → place → allocate → codegen (token mode).
117 Returns an ordered sequence: SM init tokens → IRAM write tokens → seed tokens.
118 This sequence is consumable by emulator System.inject() and System.load().
119
120 Args:
121 source: dfasm source code as a string
122
123 Returns:
124 List of tokens (SMToken, PELocalWriteToken, MonadToken) in bootstrap order
125
126 Raises:
127 ValueError: If any pipeline stage reports errors
128 """
129 graph = run_pipeline(source)
130 return generate_tokens(graph)
131
132
133def serialize_graph(graph: IRGraph) -> str:
134 """Serialize an IRGraph to dfasm source text.
135
136 Works at any pipeline stage (after lowering, resolution, placement, or allocation).
137 Useful for inspecting IR transformations after each pass:
138 graph = lower(parse(source))
139 print(serialize_graph(graph)) # inspect after lowering
140
141 Args:
142 graph: The IRGraph to serialize
143
144 Returns:
145 Valid dfasm source text
146 """
147 return _serialize_graph(graph)
148
149
150def round_trip(source: str) -> str:
151 """Parse, lower, and serialize back to dfasm (convenience for round-trip testing).
152
153 Quick round-trip for testing: source → parse → lower → serialize → source.
154 Does not run resolution, placement, or allocation.
155
156 Args:
157 source: dfasm source code as a string
158
159 Returns:
160 Valid dfasm source text
161
162 Raises:
163 ValueError: If parsing fails
164 """
165 tree = _get_parser().parse(source)
166 graph = lower(tree)
167 return _serialize_graph(graph)
168
169
170__all__ = [
171 "assemble",
172 "assemble_to_tokens",
173 "round_trip",
174 "run_pipeline",
175 "serialize_graph",
176 "AssemblyResult",
177]