// Dataflow Graph Assembly — Lark EBNF Grammar v0.2
// Parser: Earley (LALR blocked by macro_ref vs macro_call_stmt reduce/reduce conflict: both are #IDENT prefix)

start: (_NL* statement)* _NL*

?statement: func_def
          | macro_def
          | inst_def
          | strong_edge
          | weak_edge
          | plain_edge
          | data_def
          | system_pragma
          | location_dir
          | macro_call_stmt
          | call_stmt
          | repetition_block

// --- Function / subgraph definition ---
// $name |> { body }
func_def: func_ref FLOW_OUT "{" (_NL* statement)* _NL* "}"

// --- Macro definition ---
// #name [param, param, ...] |> { body }
macro_def: "#" IDENT macro_params? FLOW_OUT "{" (_NL* statement)* _NL* "}"
macro_params: macro_param ("," macro_param)*
macro_param: VARIADIC IDENT -> variadic_param
           | IDENT -> regular_param

// Variadic marker for macro parameters
VARIADIC: "*"

// --- Repetition block in macro body ---
// $( body ),* expands body once per variadic argument
repetition_block: "$(" (_NL* statement)* _NL* ")," "*"

// --- Instruction definition (named node) ---
// &label <| opcode [inline_const] [, arg ...]
// inline_const allows e.g. "&foo <| add 7" as shorthand for "&foo <| add, 7"
inst_def: qualified_ref FLOW_IN opcode inline_const? ("," argument)*

inline_const: DEC_LIT | HEX_LIT | param_ref

// --- Strong inline edge (internal route, anonymous node) ---
// opcode input [, input ...] |> output [, output ...]
strong_edge: opcode argument ("," argument)* FLOW_OUT ref_list

// --- Weak inline edge (token output, anonymous node) ---
// output [, output ...] opcode <| input [, input ...]
weak_edge: ref_list opcode FLOW_IN argument ("," argument)*

// --- Plain edge (wiring between named nodes) ---
// source |> dest [, dest ...]
plain_edge: qualified_ref FLOW_OUT ref_list

// --- Data / initialisation ---
// ref = value | ref = #macro args
data_def: qualified_ref "=" (macro_call | value_list)

// --- Location directive (bare qualified ref with trailing colon) ---
// Sets location context for subsequent definitions.
location_dir: qualified_ref ":"

// --- System pragma (hardware configuration) ---
// @system pe=4, sm=1, iram=128, ctx=2
system_pragma: "@system" system_param ("," system_param)*
system_param: IDENT "=" (DEC_LIT | HEX_LIT)

// === Shared productions ===

ref_list: qualified_ref ("," qualified_ref)*

// === References ===
// Qualifier chain: max one placement (|ident) and one port (:spec).
// @name  — node reference
// &name  — local label reference
// $name  — function / subgraph reference
// #name  — macro reference
// Chaining: @sum|pe0:L  (placement + port)

qualified_ref: (node_ref | label_ref | func_ref | macro_ref | scoped_ref | param_ref) placement? ctx_slot? port?

node_ref:  "@" IDENT
label_ref: "&" IDENT
func_ref:  "$" IDENT
macro_ref: "#" IDENT

scoped_ref: (func_ref | macro_ref) "." (label_ref | node_ref)

// ${name} — macro parameter reference (substituted during expansion)
param_ref: PARAM_REF_START IDENT "}"
PARAM_REF_START.3: "${"

placement: "|" (IDENT | param_ref)
ctx_slot:  "[" (DEC_LIT | ctx_range | param_ref) "]"
ctx_range: DEC_LIT ".." DEC_LIT
port:      ":" (PORT_SPEC | param_ref)

PORT_SPEC: IDENT | HEX_LIT | DEC_LIT

// === Arguments ===
// An argument is a value, a qualified ref, or a named key=value pair.
// Named args are syntactically valid on any instruction.
// Semantic validation (which ops accept named args) is deferred to the assembler.

?argument: named_arg | positional_arg
named_arg: IDENT "=" positional_arg
?positional_arg: value | qualified_ref | OPCODE | IDENT

// === Values (literals) ===

?value: HEX_LIT         -> hex_literal
      | DEC_LIT         -> dec_literal
      | CHAR_LIT        -> char_literal
      | STRING_LIT      -> string_literal
      | RAW_STRING_LIT  -> raw_string_literal
      | BYTE_STRING_LIT -> byte_string_literal

value_list: value ("," value)*

// === Macros ===
// #name arg [arg ...] — expanded in a later pass, not during parsing.

macro_call: "#" IDENT (value | qualified_ref)*

// #name arg [, arg ...] [|> output, ...] — standalone macro invocation (as statement)
macro_call_stmt: "#" IDENT (argument ("," argument)*)? (FLOW_OUT call_output_list)?

// --- Function call ---
// $func a=&x, b=&y |> @output [, name=@output2]
call_stmt: func_ref argument ("," argument)* FLOW_OUT call_output_list

call_output_list: call_output ("," call_output)*
call_output: IDENT "=" qualified_ref    -> named_output
           | qualified_ref               -> positional_output

// === Opcodes ===
// Exhaustive keyword terminal. Priority 2 ensures opcodes win over IDENT
// at the lexer level. Semantic validation (monadic/dyadic arity, valid
// argument combinations) is deferred to the assembler.

opcode: OPCODE | param_ref

OPCODE.2: "add" | "sub" | "inc" | "dec"
        | "shl" | "shr" | "asr"
        | "and" | "or" | "xor" | "not"
        | "eq" | "lt" | "lte" | "gt" | "gte"
        | "breq" | "brgt" | "brge" | "brof" | "brty"
        | "sweq" | "swgt" | "swge" | "swof" | "swty"
        | "gate" | "sel" | "merge"
        | "pass" | "const" | "free_frame" | "extract_tag" | "alloc_remote"
        | "read" | "write" | "clear" | "exec" | "alloc" | "free" | "rd_inc" | "rd_dec" | "cmp_sw"
        | "ior" | "iow" | "iorw"
        | "load_inst" | "route_set"

// === Flow operators ===
// Priority 3 to win over any partial match of | or < or >

FLOW_IN.3:  "<|"
FLOW_OUT.3: "|>"

// === Terminals ===

HEX_LIT:  /0x[0-9a-fA-F]+/
DEC_LIT:  /[0-9]+/

// Character literals: single char or escape sequence.
// Supported escapes: \n \t \r \0 \\ \' \xNN
CHAR_LIT: /'([^'\\]|\\[ntr0\\']|\\x[0-9a-fA-F]{2})'/

// String literals — Rust-style semantics.
//   "..."   regular string, escape sequences processed by assembler
//   r"..."  raw string, no escape processing
//   b"..."  byte string, semantic difference only (raw byte values)
// Multi-line strings are permitted: /s flag makes . match \n.
STRING_LIT:      /\"([^\"\\]|\\.)*\"/s
RAW_STRING_LIT:  /r\"[^\"]*\"/s
BYTE_STRING_LIT: /b\"([^\"\\]|\\.)*\"/s

IDENT: /[a-zA-Z_][a-zA-Z0-9_]*/

// === Whitespace & Comments ===
// ; starts a comment to end of line (traditional asm behaviour).
// Newlines are significant as statement separators.

COMMENT: /;[^\n]*/
_NL: (NEWLINE | COMMENT) (NEWLINE | COMMENT)*

%import common.NEWLINE
%import common.WS_INLINE
%ignore WS_INLINE
%ignore COMMENT