OR-1 dataflow CPU sketch

fix: function call routing — input trampolines use INHERIT, assign act_ids to function body nodes

Two root causes for the last 3 test failures:

1. Input trampoline edges incorrectly used ctx_override=True, which caused
CHANGE_TAG mode. CHANGE_TAG unpacks the left operand as a packed FrameDest,
but input trampolines receive raw data values — not FrameDests. Changed
input edges to use normal routing (INHERIT mode reads dest from frame slot,
which already encodes the function's act_id).

2. Function body nodes (e.g. $adder.&a, $adder.&sum) got act_id=0 (root
scope) instead of the call site's act_id. When a function has call sites,
its body nodes must share the call site's activation context so FrameDest
routing targets the correct frame.

Also fixed test_builtins.py and test_variadic.py to inject setup_tokens
before seed_tokens (matching test_e2e.py's pattern).

Orual ca34bfeb 2565cc17

+45 -21
+13 -2
asm/allocate.py
··· 498 498 updated_nodes = {} 499 499 500 500 # Build global mapping of which nodes belong to which call sites 501 + # Trampoline and free_frame nodes get the call site's act_id 501 502 callsite_for_node = {} # node_name -> CallSite 502 503 for call_site in call_sites: 503 504 for tramp_node in call_site.trampoline_nodes: 504 505 callsite_for_node[tramp_node] = call_site 505 506 for free_node in call_site.free_frame_nodes: 506 507 callsite_for_node[free_node] = call_site 508 + 509 + # Build mapping: function scope -> call site (for function body nodes) 510 + func_scope_to_callsite = {} # func_name -> CallSite 511 + for call_site in call_sites: 512 + func_scope_to_callsite[call_site.func_name] = call_site 507 513 508 514 # Allocate activation IDs for this PE 509 515 next_act_id = 0 ··· 608 614 call_site = callsite_for_node[node.name] 609 615 act_id_value = call_site_to_act_id_on_pe.get(call_site) 610 616 611 - # If not part of a call site, use function scope or root scope 617 + # If not part of a call site, check if it's a function body node 612 618 if act_id_value is None: 613 619 scope = _extract_function_scope(node.name) 614 - act_id_value = scope_to_act_id.get(scope, root_act_id) 620 + if scope in func_scope_to_callsite: 621 + # Function body node — gets the call site's act_id 622 + cs = func_scope_to_callsite[scope] 623 + act_id_value = call_site_to_act_id_on_pe.get(cs) 624 + if act_id_value is None: 625 + act_id_value = scope_to_act_id.get(scope, root_act_id) 615 626 616 627 updated_nodes[node.name] = replace(node, act_id=act_id_value) 617 628
+6 -4
asm/expand.py
··· 1032 1032 ) 1033 1033 wired_edges.append(source_to_tramp) 1034 1034 1035 - # Wire: trampoline -> target (with ctx_override) 1035 + # Wire: trampoline -> target (no ctx_override — INHERIT mode reads 1036 + # the destination FrameDest from the frame, which already encodes 1037 + # the function's act_id. CHANGE_TAG is wrong here because the left 1038 + # operand is raw data, not a packed FrameDest.) 1036 1039 tramp_to_target = IREdge( 1037 1040 source=tramp_name, 1038 1041 dest=target_label, 1039 1042 port=Port.L, 1040 - ctx_override=True, 1041 1043 loc=call_site.loc, 1042 1044 ) 1043 1045 wired_edges.append(tramp_to_target) 1044 1046 else: 1045 - # No conflict — direct edge with ctx_override 1047 + # No conflict — direct edge (no ctx_override — the destination 1048 + # node's act_id in the FrameDest handles cross-context routing) 1046 1049 input_edge = IREdge( 1047 1050 source=source_name, 1048 1051 dest=target_label, 1049 1052 port=Port.L, 1050 - ctx_override=True, 1051 1053 loc=call_site.loc, 1052 1054 ) 1053 1055 wired_edges.append(input_edge)
+5 -1
tests/test_builtins.py
··· 56 56 env = simpy.Environment() 57 57 sys = build_topology(env, result.pe_configs, result.sm_configs) 58 58 59 - # Inject seed tokens 59 + # Inject setup tokens first (frame/IRAM initialization) 60 + for setup in result.setup_tokens: 61 + sys.inject(setup) 62 + 63 + # Then inject seed tokens 60 64 for seed in result.seed_tokens: 61 65 sys.inject(seed) 62 66
+19 -14
tests/test_call_wiring.py
··· 385 385 assert len(call_site.free_frame_nodes) > 0 386 386 387 387 388 - def test_input_edges_have_ctx_override(): 389 - """Input edges from call site to function parameters have ctx_override=True. 388 + def test_input_edges_use_inherit_not_ctx_override(): 389 + """Input edges from call site to function parameters use INHERIT, not CHANGE_TAG. 390 390 391 - When source node has a const, a pass trampoline is inserted to avoid 392 - the const+CTX_OVRD conflict (AC5.3). The ctx_override edge is then 393 - from the trampoline to the function parameter. 391 + In the frame-based model, cross-context routing for input edges is handled by 392 + the FrameDest's act_id (which differs between caller and function activation). 393 + Only return trampolines use ctx_override/CHANGE_TAG because they decode a 394 + packed FrameDest from EXTRACT_TAG. 394 395 """ 395 396 source = """ 396 397 $add |> { ··· 406 407 graph = parse_lower_expand(source) 407 408 408 409 # Since &x has const=5, a trampoline is inserted: &x -> trampoline -> $add.&a 409 - # The ctx_override edge is from the trampoline to $add.&a 410 - ctx_override_to_a = False 410 + # Neither edge should have ctx_override — input routing uses INHERIT mode 411 411 for edge in graph.edges: 412 - if "$add.&a" in str(edge.dest) and edge.ctx_override: 413 - ctx_override_to_a = True 414 - break 412 + if "$add.&a" in str(edge.dest): 413 + assert not edge.ctx_override, ( 414 + f"Input edge to $add.&a should NOT have ctx_override " 415 + f"(frame-based routing uses INHERIT with act_id in FrameDest)" 416 + ) 415 417 416 - assert ctx_override_to_a, ( 417 - f"Expected ctx_override edge targeting $add.&a, " 418 - f"edges: {[(e.source, e.dest, e.ctx_override) for e in graph.edges]}" 419 - ) 418 + # Only return trampoline edges should have ctx_override 419 + ret_ctx_override = [e for e in graph.edges if e.ctx_override] 420 + assert len(ret_ctx_override) > 0, "Return trampoline should have ctx_override" 421 + for e in ret_ctx_override: 422 + assert "__ret_trampoline" in e.source, ( 423 + f"Only return trampolines should have ctx_override, got: {e.source} -> {e.dest}" 424 + ) 420 425 421 426 422 427 def test_shared_function_body():
+2
tests/test_variadic.py
··· 506 506 507 507 env = simpy.Environment() 508 508 sys = build_topology(env, result.pe_configs, result.sm_configs) 509 + for setup in result.setup_tokens: 510 + sys.inject(setup) 509 511 for seed in result.seed_tokens: 510 512 sys.inject(seed) 511 513 env.run(until=500)