"""Intermediate Representation (IR) types for OR1 assembly. Frozen dataclasses define the IR node types that represent a lowered assembly program as a graph with nodes, edges, regions, and data definitions. This module follows the patterns established in tokens.py and cm_inst.py. """ from __future__ import annotations from dataclasses import dataclass, field, replace from enum import Enum from typing import TYPE_CHECKING, Iterator, Optional, Union from cm_inst import ALUOp, FrameDest, MemOp, Port if TYPE_CHECKING: from asm.errors import AssemblyError # Default configuration values for system parameters DEFAULT_IRAM_CAPACITY = 256 DEFAULT_FRAME_COUNT = 8 DEFAULT_FRAME_SLOTS = 64 DEFAULT_MATCHABLE_OFFSETS = 8 @dataclass(frozen=True) class SourceLoc: """Source location for error reporting. Extracted from Lark's meta object during parsing. """ line: int column: int end_line: Optional[int] = None end_column: Optional[int] = None @dataclass(frozen=True) class NameRef: """Unresolved symbolic reference to a node or label. Attributes: name: The symbolic name (e.g., "&label" or "@node") port: Optional port specification (L or R) """ name: str port: Optional[Port] = None @dataclass(frozen=True) class ResolvedDest: """Fully resolved destination after name resolution. Attributes: name: The qualified name addr: Deprecated (set to None in frame-based model) frame_dest: The resolved FrameDest with target PE, offset, act_id, port, and token kind """ name: str addr: Optional[object] = None frame_dest: Optional[FrameDest] = None @dataclass(frozen=True) class IRNode: """One instruction in the IR graph. Represents a single instruction that may be executed by a PE. Can be a dyadic ALU operation, a monadic routing operation, or a memory (SM) operation. Attributes: name: Qualified name (e.g., "$main.&add" or "&top_level") or ParamRef for macro templates opcode: ALUOp or MemOp enum value dest_l: Left output destination (before name resolution) dest_r: Right output destination (before name resolution) const: Optional constant operand (int, ParamRef, or ConstExpr) pe: Optional PE placement qualifier iram_offset: Optional offset in PE's IRAM (populated during allocation) act_slot: Optional activation slot (populated during allocation) act_id: Optional activation ID (populated during allocation) mode: Optional output mode tuple (OutputStyle, has_const, dest_count) — set by allocate fref: Optional frame slot base index — set by allocate wide: Wide operation flag frame_layout: Optional frame slot map — set by allocate loc: Source location for error reporting args: Optional named arguments dictionary (e.g., {"dest": 0x45}) sm_id: Optional SM ID for MemOp instructions (populated during lowering) """ name: Union[str, ParamRef] opcode: Union[ALUOp, MemOp, ParamRef] dest_l: Optional[Union[NameRef, ResolvedDest]] = None dest_r: Optional[Union[NameRef, ResolvedDest]] = None const: Optional[Union[int, ParamRef, ConstExpr]] = None pe: Optional[Union[int, PlacementRef]] = None act_slot: Optional[Union[int, ActSlotRef, ActSlotRange]] = None iram_offset: Optional[int] = None act_id: Optional[int] = None mode: Optional[tuple] = None fref: Optional[int] = None wide: bool = False frame_layout: Optional[FrameLayout] = None loc: SourceLoc = SourceLoc(0, 0) args: Optional[dict[str, int]] = None sm_id: Optional[int] = None seed: bool = False @dataclass(frozen=True) class IREdge: """Connection between two IR nodes. Attributes: source: Name of the source node (str or ParamRef for macro templates) dest: Name of the destination node (str or ParamRef for macro templates) port: Destination input port (L or R) source_port: Source output slot (L or R); None means allocator infers it port_explicit: Whether the destination port was explicitly specified by the user ctx_override: Whether this edge crosses context boundaries (function calls) loc: Source location for error reporting """ source: Union[str, ParamRef] dest: Union[str, ParamRef] port: Union[Port, PortRef] source_port: Optional[Union[Port, PortRef]] = None port_explicit: bool = False ctx_override: bool = False loc: SourceLoc = SourceLoc(0, 0) class RegionKind(Enum): """Kind of IR region (nested scope).""" FUNCTION = "function" LOCATION = "location" MACRO = "macro" @dataclass(frozen=True) class IRDataDef: """Data definition (initialization in structure memory). Attributes: name: Name of the data (e.g., "@hello") sm_id: Optional SM ID (populated from placement during lowering) cell_addr: Optional cell address (populated from port during lowering) value: 16-bit value to store (big-endian packed for multi-char data) loc: Source location for error reporting """ name: str sm_id: Optional[int] = None cell_addr: Optional[int] = None value: int = 0 loc: SourceLoc = SourceLoc(0, 0) @dataclass(frozen=True) class MacroParam: """Formal parameter in a macro definition. Attributes: name: Parameter name (without sigil) variadic: Whether this is a variadic parameter (*name), which collects remaining args """ name: str variadic: bool = False @dataclass(frozen=True) class ParamRef: """Placeholder for a macro parameter within a template IR. Used in macro body templates to mark where actual arguments should be substituted during expansion. Supports token pasting via optional prefix/suffix strings. Attributes: param: Formal parameter name this references prefix: Optional string prepended during token pasting suffix: Optional string appended during token pasting """ param: str prefix: str = "" suffix: str = "" @dataclass(frozen=True) class PlacementRef: """Deferred placement from macro parameter.""" param: ParamRef @dataclass(frozen=True) class PortRef: """Deferred port from macro parameter.""" param: ParamRef @dataclass(frozen=True) class ActSlotRef: """Deferred activation slot from macro parameter.""" param: ParamRef @dataclass(frozen=True) class ActSlotRange: """Explicit activation slot range reservation.""" start: int end: int @dataclass(frozen=True) class FrameSlotMap: """Slot map for a frame layout. Attributes: match_slots: Offsets of match operand slots const_slots: Offsets of constant slots dest_slots: Offsets of destination slots sink_slots: Offsets of sink/SM parameter slots """ match_slots: tuple[int, ...] const_slots: tuple[int, ...] dest_slots: tuple[int, ...] sink_slots: tuple[int, ...] @dataclass(frozen=True) class FrameLayout: """Frame slot layout for an activation. Attributes: slot_map: The frame slot map total_slots: Total number of slots used """ slot_map: FrameSlotMap total_slots: int @dataclass(frozen=True) class IRRepetitionBlock: """A repetition block within a macro body template. The body is expanded once per variadic argument during macro expansion. Each iteration binds the variadic param to the current element and ${_idx} to the iteration index. Attributes: body: Template IRGraph for the repeating section variadic_param: Name of the variadic parameter this iterates over loc: Source location for error reporting """ body: IRGraph variadic_param: str loc: SourceLoc = SourceLoc(0, 0) @dataclass(frozen=True) class ConstExpr: """Arithmetic expression in macro body constant field. Evaluated during expansion when parameter values are known. Supports +, -, * on integer-valued parameters and literals. Attributes: expression: Expression source string, e.g. "base + 1" params: Parameter names referenced in the expression loc: Source location for error reporting """ expression: str params: tuple[str, ...] loc: SourceLoc = SourceLoc(0, 0) @dataclass(frozen=True) class MacroDef: """A macro definition: name, parameters, and body template. The body IRGraph may contain ParamRef instances in node const fields and edge source/dest fields. These are resolved during macro expansion (Phase 2). Attributes: name: Macro name (without # sigil) params: Ordered tuple of formal parameters body: Template IRGraph with ParamRef placeholders repetition_blocks: List of repetition blocks in the body (Phase 6) loc: Source location for error reporting """ name: str params: tuple[MacroParam, ...] body: IRGraph repetition_blocks: list[IRRepetitionBlock] = field(default_factory=list) loc: SourceLoc = SourceLoc(0, 0) @dataclass(frozen=True) class IRMacroCall: """A macro invocation in the IR. Stored in IRGraph.macro_calls. Processed and removed by the expand pass (Phase 2). Attributes: name: Macro name being invoked (without # sigil) positional_args: Positional argument values named_args: Named argument key-value pairs loc: Source location for error reporting """ name: str positional_args: tuple = () named_args: tuple[tuple[str, object], ...] = () output_dests: tuple = () loc: SourceLoc = SourceLoc(0, 0) @dataclass(frozen=True) class CallSiteResult: """Intermediate call site data from lower pass, consumed by expand pass. Attributes: func_name: Name of the called function (e.g., "$fib") input_args: Tuple of (param_name, source_ref) pairs output_dests: Tuple of output destinations (positional or named) loc: Source location for error reporting """ func_name: str input_args: tuple[tuple[str, str], ...] = () output_dests: tuple = () loc: SourceLoc = SourceLoc(0, 0) @dataclass(frozen=True) class CallSite: """Metadata for a function call site. Generated by the expand pass when processing call_stmt syntax. Used by the allocator for per-call-site context slot assignment. Attributes: func_name: Name of the called function (e.g., "$fib") call_id: Unique call site identifier (counter) input_edges: Edge names for cross-context inputs trampoline_nodes: Names of generated trampoline pass nodes free_frame_nodes: Names of generated free_frame nodes loc: Source location of the call """ func_name: str call_id: int input_edges: tuple[str, ...] = () trampoline_nodes: tuple[str, ...] = () free_frame_nodes: tuple[str, ...] = () loc: SourceLoc = SourceLoc(0, 0) @dataclass(frozen=True) class SystemConfig: """System configuration from @system pragma. Attributes: pe_count: Number of processing elements sm_count: Number of structure memory instances iram_capacity: IRAM size per PE (default 256) frame_count: Number of frames per PE (default 8) frame_slots: Total slots per frame (default 64) matchable_offsets: Number of matchable IRAM offsets per frame (default 8) loc: Source location for error reporting """ pe_count: int sm_count: int iram_capacity: int = DEFAULT_IRAM_CAPACITY frame_count: int = DEFAULT_FRAME_COUNT frame_slots: int = DEFAULT_FRAME_SLOTS matchable_offsets: int = DEFAULT_MATCHABLE_OFFSETS loc: SourceLoc = SourceLoc(0, 0) @dataclass(frozen=True) class IRRegion: """Nested scope (function or location region). Attributes: tag: Name of the region (e.g., "$main" or "@data_section") kind: Type of region (FUNCTION or LOCATION) body: IRGraph containing statements within this region loc: Source location for error reporting """ tag: str kind: RegionKind body: IRGraph loc: SourceLoc = SourceLoc(0, 0) @dataclass(frozen=True) class IRGraph: """Complete IR representation of an assembly program or region. This is the primary data structure produced by the Lower pass. It contains all nodes, edges, nested regions, and data definitions. Macro definitions and invocations are stored separately for processing by the expand pass. Note: IRGraph is frozen but holds mutable containers. This follows the PEConfig pattern: each pass returns a new IRGraph, and containers are never mutated after construction. Attributes: nodes: Dictionary of IRNodes keyed by qualified name edges: List of IREdges connecting nodes regions: List of IRRegions (nested scopes) data_defs: List of IRDataDefs (memory initialization) system: Optional SystemConfig from @system pragma errors: List of AssemblyErrors encountered during lowering macro_defs: List of MacroDefs (macro definitions before expansion) macro_calls: List of IRMacroCalls (macro invocations to be expanded) raw_call_sites: Tuple of CallSiteResults from lower pass call_sites: List of CallSites (processed by expand pass) builtin_line_offset: Number of lines in prepended built-in macros (for error reporting) """ nodes: dict[str, IRNode] = field(default_factory=dict) edges: list[IREdge] = field(default_factory=list) regions: list[IRRegion] = field(default_factory=list) data_defs: list[IRDataDef] = field(default_factory=list) system: Optional[SystemConfig] = None errors: list[AssemblyError] = field(default_factory=list) macro_defs: list[MacroDef] = field(default_factory=list) macro_calls: list[IRMacroCall] = field(default_factory=list) raw_call_sites: tuple[CallSiteResult, ...] = () call_sites: list[CallSite] = field(default_factory=list) builtin_line_offset: int = 0 def iter_all_subgraphs(graph: IRGraph) -> Iterator[IRGraph]: """Iterate over a graph and all nested region body graphs recursively. Yields the graph itself first, then all graphs in nested regions in depth-first order. Args: graph: The root IRGraph Yields: IRGraph objects from the hierarchy """ yield graph def _walk_regions(regions: list[IRRegion]) -> Iterator[IRGraph]: for region in regions: yield region.body yield from _walk_regions(region.body.regions) yield from _walk_regions(graph.regions) def collect_all_nodes(graph: IRGraph) -> dict[str, IRNode]: """Collect all nodes from graph and regions recursively. Args: graph: The IRGraph Returns: Dictionary mapping node names to IRNodes """ all_nodes = {} for subgraph in iter_all_subgraphs(graph): all_nodes.update(subgraph.nodes) return all_nodes def collect_all_nodes_and_edges(graph: IRGraph) -> tuple[dict[str, IRNode], list[IREdge]]: """Collect all nodes and edges from graph and regions recursively. Args: graph: The IRGraph Returns: Tuple of (all_nodes dict, all_edges list) """ all_nodes = {} all_edges = [] for subgraph in iter_all_subgraphs(graph): all_nodes.update(subgraph.nodes) all_edges.extend(subgraph.edges) return all_nodes, all_edges def collect_all_data_defs(graph: IRGraph) -> list[IRDataDef]: """Collect all data_defs from graph and regions recursively. Args: graph: The IRGraph Returns: List of all IRDataDef objects """ all_defs = [] for subgraph in iter_all_subgraphs(graph): all_defs.extend(subgraph.data_defs) return all_defs def update_graph_nodes( graph: IRGraph, updated_nodes: dict[str, IRNode] ) -> IRGraph: """Recursively update nodes in graph and regions. This function traverses the graph structure and replaces nodes with updated versions from the provided dictionary. It preserves the tree structure of regions and regions-within-regions. Args: graph: The IRGraph to update updated_nodes: Dictionary mapping node names to updated IRNode instances Returns: New IRGraph with updated nodes """ # Update top-level nodes new_g_nodes = {} for name, node in graph.nodes.items(): new_g_nodes[name] = updated_nodes.get(name, node) # Update regions recursively new_regions = [] for region in graph.regions: new_body = update_graph_nodes(region.body, updated_nodes) new_regions.append(replace(region, body=new_body)) return replace(graph, nodes=new_g_nodes, regions=new_regions)