// Dataflow Graph Assembly — Lark EBNF Grammar v0.2 // Parser: Earley (LALR blocked by macro_ref vs macro_call_stmt reduce/reduce conflict: both are #IDENT prefix) start: (_NL* statement)* _NL* ?statement: func_def | macro_def | inst_def | strong_edge | weak_edge | plain_edge | data_def | system_pragma | location_dir | macro_call_stmt | call_stmt | repetition_block // --- Function / subgraph definition --- // $name |> { body } func_def: func_ref FLOW_OUT "{" (_NL* statement)* _NL* "}" // --- Macro definition --- // #name [param, param, ...] |> { body } macro_def: "#" IDENT macro_params? FLOW_OUT "{" (_NL* statement)* _NL* "}" macro_params: macro_param ("," macro_param)* macro_param: VARIADIC IDENT -> variadic_param | IDENT -> regular_param // Variadic marker for macro parameters VARIADIC: "*" // --- Repetition block in macro body --- // $( body ),* expands body once per variadic argument repetition_block: "$(" (_NL* statement)* _NL* ")," "*" // --- Instruction definition (named node) --- // &label <| opcode [inline_const] [, arg ...] // inline_const allows e.g. "&foo <| add 7" as shorthand for "&foo <| add, 7" inst_def: qualified_ref FLOW_IN opcode inline_const? ("," argument)* inline_const: DEC_LIT | HEX_LIT | param_ref // --- Strong inline edge (internal route, anonymous node) --- // opcode input [, input ...] |> output [, output ...] strong_edge: opcode argument ("," argument)* FLOW_OUT ref_list // --- Weak inline edge (token output, anonymous node) --- // output [, output ...] opcode <| input [, input ...] weak_edge: ref_list opcode FLOW_IN argument ("," argument)* // --- Plain edge (wiring between named nodes) --- // source |> dest [, dest ...] plain_edge: qualified_ref FLOW_OUT ref_list // --- Data / initialisation --- // ref = value | ref = #macro args data_def: qualified_ref "=" (macro_call | value_list) // --- Location directive (bare qualified ref with trailing colon) --- // Sets location context for subsequent definitions. location_dir: qualified_ref ":" // --- System pragma (hardware configuration) --- // @system pe=4, sm=1, iram=128, ctx=2 system_pragma: "@system" system_param ("," system_param)* system_param: IDENT "=" (DEC_LIT | HEX_LIT) // === Shared productions === ref_list: qualified_ref ("," qualified_ref)* // === References === // Qualifier chain: max one placement (|ident) and one port (:spec). // @name — node reference // &name — local label reference // $name — function / subgraph reference // #name — macro reference // Chaining: @sum|pe0:L (placement + port) qualified_ref: (node_ref | label_ref | func_ref | macro_ref | scoped_ref | param_ref) placement? ctx_slot? port? node_ref: "@" IDENT label_ref: "&" IDENT func_ref: "$" IDENT macro_ref: "#" IDENT scoped_ref: (func_ref | macro_ref) "." (label_ref | node_ref) // ${name} — macro parameter reference (substituted during expansion) param_ref: PARAM_REF_START IDENT "}" PARAM_REF_START.3: "${" placement: "|" (IDENT | param_ref) ctx_slot: "[" (DEC_LIT | ctx_range | param_ref) "]" ctx_range: DEC_LIT ".." DEC_LIT port: ":" (PORT_SPEC | param_ref) PORT_SPEC: IDENT | HEX_LIT | DEC_LIT // === Arguments === // An argument is a value, a qualified ref, or a named key=value pair. // Named args are syntactically valid on any instruction. // Semantic validation (which ops accept named args) is deferred to the assembler. ?argument: named_arg | positional_arg named_arg: IDENT "=" positional_arg ?positional_arg: value | qualified_ref | OPCODE | IDENT // === Values (literals) === ?value: HEX_LIT -> hex_literal | DEC_LIT -> dec_literal | CHAR_LIT -> char_literal | STRING_LIT -> string_literal | RAW_STRING_LIT -> raw_string_literal | BYTE_STRING_LIT -> byte_string_literal value_list: value ("," value)* // === Macros === // #name arg [arg ...] — expanded in a later pass, not during parsing. macro_call: "#" IDENT (value | qualified_ref)* // #name arg [, arg ...] [|> output, ...] — standalone macro invocation (as statement) macro_call_stmt: "#" IDENT (argument ("," argument)*)? (FLOW_OUT call_output_list)? // --- Function call --- // $func a=&x, b=&y |> @output [, name=@output2] call_stmt: func_ref argument ("," argument)* FLOW_OUT call_output_list call_output_list: call_output ("," call_output)* call_output: IDENT "=" qualified_ref -> named_output | qualified_ref -> positional_output // === Opcodes === // Exhaustive keyword terminal. Priority 2 ensures opcodes win over IDENT // at the lexer level. Semantic validation (monadic/dyadic arity, valid // argument combinations) is deferred to the assembler. opcode: OPCODE | param_ref OPCODE.2: "add" | "sub" | "inc" | "dec" | "shl" | "shr" | "asr" | "and" | "or" | "xor" | "not" | "eq" | "lt" | "lte" | "gt" | "gte" | "breq" | "brgt" | "brge" | "brof" | "brty" | "sweq" | "swgt" | "swge" | "swof" | "swty" | "gate" | "sel" | "merge" | "pass" | "const" | "free_frame" | "extract_tag" | "alloc_remote" | "read" | "write" | "clear" | "exec" | "alloc" | "free" | "rd_inc" | "rd_dec" | "cmp_sw" | "ior" | "iow" | "iorw" | "load_inst" | "route_set" // === Flow operators === // Priority 3 to win over any partial match of | or < or > FLOW_IN.3: "<|" FLOW_OUT.3: "|>" // === Terminals === HEX_LIT: /0x[0-9a-fA-F]+/ DEC_LIT: /[0-9]+/ // Character literals: single char or escape sequence. // Supported escapes: \n \t \r \0 \\ \' \xNN CHAR_LIT: /'([^'\\]|\\[ntr0\\']|\\x[0-9a-fA-F]{2})'/ // String literals — Rust-style semantics. // "..." regular string, escape sequences processed by assembler // r"..." raw string, no escape processing // b"..." byte string, semantic difference only (raw byte values) // Multi-line strings are permitted: /s flag makes . match \n. STRING_LIT: /\"([^\"\\]|\\.)*\"/s RAW_STRING_LIT: /r\"[^\"]*\"/s BYTE_STRING_LIT: /b\"([^\"\\]|\\.)*\"/s IDENT: /[a-zA-Z_][a-zA-Z0-9_]*/ // === Whitespace & Comments === // ; starts a comment to end of line (traditional asm behaviour). // Newlines are significant as statement separators. COMMENT: /;[^\n]*/ _NL: (NEWLINE | COMMENT) (NEWLINE | COMMENT)* %import common.NEWLINE %import common.WS_INLINE %ignore WS_INLINE %ignore COMMENT