An Elixir implementation of AT Protocol-flavoured Merkle Search Trees (MST)
at main 808 lines 29 kB view raw
1defmodule MST.Tree do 2 @moduledoc """ 3 An in-memory Merkle Search Tree. 4 5 `MST.Tree` is the primary interface for building and querying MSTs. It pairs 6 a root CID (or `nil` for an empty tree) with a block store that maps CIDs to 7 decoded `MST.Node` structs. 8 9 All mutation operations (`put/3`, `delete/3`) return a new `MST.Tree` — 10 the data structure is persistent/immutable. The underlying store accumulates 11 all written nodes across mutations; stale nodes are not removed (no GC). 12 13 ## Example 14 15 store = MST.Store.Memory.new() 16 tree = MST.Tree.new(store) 17 18 val = DASL.CID.compute("my record data") 19 {:ok, tree} = MST.Tree.put(tree, "collection/record-key", val) 20 {:ok, ^val} = MST.Tree.get(tree, "collection/record-key") 21 22 {:ok, tree} = MST.Tree.delete(tree, "collection/record-key") 23 {:error, :not_found} = MST.Tree.get(tree, "collection/record-key") 24 25 """ 26 27 use TypedStruct 28 import Kernel, except: [length: 1] 29 30 alias DASL.CID 31 alias MST.{Height, Node, Node.Entry, Store} 32 33 @type store() :: Store.t() 34 @type tree_error() :: {:error, atom()} 35 36 typedstruct enforce: true do 37 field :root, CID.t() | nil 38 field :store, store() 39 end 40 41 # --------------------------------------------------------------------------- 42 # Construction 43 # --------------------------------------------------------------------------- 44 45 @doc """ 46 Returns a new, empty tree backed by the given store. 47 48 ## Examples 49 50 iex> tree = MST.Tree.new(MST.Store.Memory.new()) 51 iex> tree.root 52 nil 53 54 """ 55 @spec new(store()) :: t() 56 def new(store), do: %__MODULE__{root: nil, store: store} 57 58 @doc """ 59 Returns a tree referencing an existing root node CID in the given store. 60 61 Use this to wrap an already-populated store (e.g. after loading from a CAR 62 file). 63 64 ## Examples 65 66 iex> store = MST.Store.Memory.new() 67 iex> node = MST.Node.empty() 68 iex> {:ok, cid} = MST.Node.cid(node) 69 iex> store = MST.Store.put(store, cid, node) 70 iex> tree = MST.Tree.from_root(cid, store) 71 iex> tree.root == cid 72 true 73 74 """ 75 @spec from_root(CID.t() | nil, store()) :: t() 76 def from_root(root, store), do: %__MODULE__{root: root, store: store} 77 78 # --------------------------------------------------------------------------- 79 # Lookup 80 # --------------------------------------------------------------------------- 81 82 @doc """ 83 Looks up `key` in the tree. 84 85 Returns `{:ok, value_cid}` if found, `{:error, :not_found}` otherwise. 86 87 ## Examples 88 89 iex> store = MST.Store.Memory.new() 90 iex> tree = MST.Tree.new(store) 91 iex> MST.Tree.get(tree, "col/key") 92 {:error, :not_found} 93 94 """ 95 @spec get(t(), binary()) :: {:ok, CID.t()} | {:error, :not_found} | tree_error() 96 def get(%__MODULE__{root: nil}, _key), do: {:error, :not_found} 97 98 def get(%__MODULE__{root: root, store: store}, key) do 99 search(store, root, key) 100 end 101 102 # --------------------------------------------------------------------------- 103 # Mutation 104 # --------------------------------------------------------------------------- 105 106 @doc """ 107 Inserts or updates `key` → `value` in the tree. 108 109 Returns `{:ok, new_tree}` on success. The new tree shares the store with the 110 old tree, but both may be used independently — nodes are append-only. 111 112 ## Examples 113 114 iex> store = MST.Store.Memory.new() 115 iex> tree = MST.Tree.new(store) 116 iex> val = DASL.CID.compute("data") 117 iex> {:ok, tree2} = MST.Tree.put(tree, "col/key", val) 118 iex> MST.Tree.get(tree2, "col/key") 119 {:ok, val} 120 121 """ 122 @spec put(t(), binary(), CID.t()) :: {:ok, t()} | tree_error() 123 def put(%__MODULE__{root: nil, store: store}, key, value) do 124 # Empty tree — create a leaf node. No intermediate layers needed for a 125 # single-key tree (the spec says empty nodes at the top must be pruned). 126 node = leaf_node(key, value) 127 128 with {:ok, root, store} <- write_node(store, node) do 129 {:ok, %__MODULE__{root: root, store: store}} 130 end 131 end 132 133 def put(%__MODULE__{root: root, store: store}, key, value) do 134 with {:ok, root_node} <- fetch_node(store, root) do 135 if root_node.entries == [] and root_node.left == nil do 136 # Empty root from CAR import — treat as fresh empty tree. 137 put(%__MODULE__{root: nil, store: store}, key, value) 138 else 139 key_height = Height.for_key(key) 140 tree_height = require_height(store, root_node) 141 142 with {:ok, new_root, store} <- 143 do_insert(store, root, key, value, key_height, tree_height) do 144 {:ok, %__MODULE__{root: new_root, store: store}} 145 end 146 end 147 end 148 end 149 150 @doc """ 151 Removes `key` from the tree. 152 153 Returns `{:ok, new_tree}` on success, `{:error, :not_found}` if the key 154 does not exist. 155 156 ## Examples 157 158 iex> store = MST.Store.Memory.new() 159 iex> tree = MST.Tree.new(store) 160 iex> val = DASL.CID.compute("data") 161 iex> {:ok, tree2} = MST.Tree.put(tree, "col/key", val) 162 iex> {:ok, tree3} = MST.Tree.delete(tree2, "col/key") 163 iex> MST.Tree.get(tree3, "col/key") 164 {:error, :not_found} 165 166 """ 167 @spec delete(t(), binary()) :: {:ok, t()} | {:error, :not_found} | tree_error() 168 def delete(%__MODULE__{root: nil}, _key), do: {:error, :not_found} 169 170 def delete(%__MODULE__{root: root, store: store}, key) do 171 with {:ok, root_node} <- fetch_node(store, root) do 172 key_height = Height.for_key(key) 173 tree_height = require_height(store, root_node) 174 175 with {:ok, new_root, store} <- 176 do_remove(store, root, key, key_height, tree_height) do 177 # Trim empty wrappers from the top after deletion. 178 {:ok, new_root, store} = trim_top(store, new_root) 179 {:ok, %__MODULE__{root: new_root, store: store}} 180 end 181 end 182 end 183 184 # --------------------------------------------------------------------------- 185 # Traversal 186 # --------------------------------------------------------------------------- 187 188 @doc """ 189 Returns all key-value pairs in the tree as a sorted list. 190 191 ## Examples 192 193 iex> store = MST.Store.Memory.new() 194 iex> tree = MST.Tree.new(store) 195 iex> val = DASL.CID.compute("data") 196 iex> {:ok, tree} = MST.Tree.put(tree, "col/b", val) 197 iex> {:ok, tree} = MST.Tree.put(tree, "col/a", val) 198 iex> {:ok, pairs} = MST.Tree.to_list(tree) 199 iex> Enum.map(pairs, &elem(&1, 0)) 200 ["col/a", "col/b"] 201 202 """ 203 @spec to_list(t()) :: {:ok, [{binary(), CID.t()}]} | tree_error() 204 def to_list(%__MODULE__{root: nil}), do: {:ok, []} 205 206 def to_list(%__MODULE__{root: root, store: store}) do 207 with {:ok, reversed} <- walk(store, root, []) do 208 {:ok, Enum.reverse(reversed)} 209 end 210 end 211 212 @doc """ 213 Returns a lazy stream of `{key, value_cid}` pairs in sorted order. 214 215 The stream reads nodes from the store on demand. Raises on missing nodes 216 (consistent with lazy stream semantics). 217 218 ## Examples 219 220 iex> store = MST.Store.Memory.new() 221 iex> tree = MST.Tree.new(store) 222 iex> val = DASL.CID.compute("x") 223 iex> {:ok, tree} = MST.Tree.put(tree, "col/a", val) 224 iex> tree |> MST.Tree.stream() |> Enum.to_list() 225 [{"col/a", val}] 226 227 """ 228 @spec stream(t()) :: Enumerable.t() 229 def stream(%__MODULE__{root: nil}), do: [] 230 231 def stream(%__MODULE__{root: root, store: store}) do 232 Stream.resource( 233 fn -> [{:visit, root}] end, 234 fn 235 [] -> 236 {:halt, []} 237 238 [{:yield, k, v} | rest] -> 239 {[{k, v}], rest} 240 241 [{:visit, cid} | rest] -> 242 node = fetch_node!(store, cid) 243 full_keys = Node.keys(node) 244 {[], in_order_items(node, full_keys) ++ rest} 245 end, 246 fn _ -> :ok end 247 ) 248 end 249 250 @doc """ 251 Returns the number of key-value pairs in the tree. 252 253 ## Examples 254 255 iex> store = MST.Store.Memory.new() 256 iex> tree = MST.Tree.new(store) 257 iex> {:ok, 0} = MST.Tree.length(tree) 258 iex> val = DASL.CID.compute("x") 259 iex> {:ok, tree} = MST.Tree.put(tree, "col/a", val) 260 iex> MST.Tree.length(tree) 261 {:ok, 1} 262 263 """ 264 @spec length(t()) :: {:ok, non_neg_integer()} | tree_error() 265 def length(tree) do 266 with {:ok, pairs} <- to_list(tree) do 267 {:ok, Kernel.length(pairs)} 268 end 269 end 270 271 # --------------------------------------------------------------------------- 272 # Block collection 273 # --------------------------------------------------------------------------- 274 275 @doc """ 276 Collects all MST nodes reachable from the root into a map of CID → encoded bytes. 277 278 Useful for serialising the tree to a CAR file. 279 280 ## Examples 281 282 iex> store = MST.Store.Memory.new() 283 iex> tree = MST.Tree.new(store) 284 iex> val = DASL.CID.compute("x") 285 iex> {:ok, tree} = MST.Tree.put(tree, "col/a", val) 286 iex> {:ok, blocks} = MST.Tree.collect_blocks(tree) 287 iex> map_size(blocks) >= 1 288 true 289 290 """ 291 @spec collect_blocks(t()) :: {:ok, %{CID.t() => binary()}} | tree_error() 292 def collect_blocks(%__MODULE__{root: nil}), do: {:ok, %{}} 293 294 def collect_blocks(%__MODULE__{root: root, store: store}) do 295 collect_reachable(store, root, %{}) 296 end 297 298 # --------------------------------------------------------------------------- 299 # Private — search 300 # --------------------------------------------------------------------------- 301 302 @spec search(store(), CID.t(), binary()) :: 303 {:ok, CID.t()} | {:error, :not_found} | tree_error() 304 defp search(store, cid, key) do 305 with {:ok, node} <- fetch_node(store, cid) do 306 full_keys = Node.keys(node) 307 search_node(store, node, full_keys, key) 308 end 309 end 310 311 @spec search_node(store(), Node.t(), [binary()], binary()) :: 312 {:ok, CID.t()} | {:error, :not_found} | tree_error() 313 defp search_node(store, node, full_keys, key) do 314 case locate(full_keys, key) do 315 {:found, idx} -> 316 {:ok, Enum.at(node.entries, idx).value} 317 318 {:left} -> 319 descend(store, node.left, key) 320 321 {:right, idx} -> 322 descend(store, Enum.at(node.entries, idx).right, key) 323 end 324 end 325 326 @spec descend(store(), CID.t() | nil, binary()) :: 327 {:ok, CID.t()} | {:error, :not_found} | tree_error() 328 defp descend(_store, nil, _key), do: {:error, :not_found} 329 defp descend(store, cid, key), do: search(store, cid, key) 330 331 # --------------------------------------------------------------------------- 332 # Private — insert 333 # --------------------------------------------------------------------------- 334 335 # Recursive insert into the subtree rooted at `cid`. 336 # `tree_height` is the known height of the node (threaded from the parent). 337 @spec do_insert(store(), CID.t(), binary(), CID.t(), non_neg_integer(), non_neg_integer()) :: 338 {:ok, CID.t(), store()} | tree_error() 339 defp do_insert(store, cid, key, value, key_height, tree_height) do 340 with {:ok, node} <- fetch_node(store, cid) do 341 cond do 342 key_height > tree_height -> 343 # Key belongs at a higher layer. Wrap the current node in an empty 344 # parent and recurse at tree_height + 1. 345 wrapper = %Node{left: cid, entries: []} 346 347 with {:ok, wrapper_cid, store} <- write_node(store, wrapper) do 348 do_insert(store, wrapper_cid, key, value, key_height, tree_height + 1) 349 end 350 351 key_height < tree_height -> 352 # Descend into the appropriate subtree. 353 {kv_pairs, subtrees} = node_to_arrays(node) 354 keys = Enum.map(kv_pairs, &elem(&1, 0)) 355 idx = lower_bound(keys, key) 356 subtree_cid = Enum.at(subtrees, idx) 357 358 with {:ok, new_sub, store} <- 359 insert_into_subtree( 360 store, 361 subtree_cid, 362 key, 363 value, 364 key_height, 365 tree_height - 1 366 ) do 367 new_subtrees = List.replace_at(subtrees, idx, new_sub) 368 write_node(store, arrays_to_node(kv_pairs, new_subtrees)) 369 end 370 371 true -> 372 # key_height == tree_height — insert at this level. 373 put_here(store, node, key, value) 374 end 375 end 376 end 377 378 # Insert into a subtree that may be nil. When nil, creates a new leaf and 379 # wraps it in as many empty intermediate layers as needed. 380 @spec insert_into_subtree( 381 store(), 382 CID.t() | nil, 383 binary(), 384 CID.t(), 385 non_neg_integer(), 386 non_neg_integer() 387 ) :: {:ok, CID.t(), store()} | tree_error() 388 defp insert_into_subtree(store, nil, key, value, key_height, expected_height) do 389 leaf = leaf_node(key, value) 390 391 with {:ok, leaf_cid, store} <- write_node(store, leaf) do 392 wrap_with_empty_layers(store, leaf_cid, expected_height - key_height) 393 end 394 end 395 396 defp insert_into_subtree(store, cid, key, value, key_height, expected_height) do 397 do_insert(store, cid, key, value, key_height, expected_height) 398 end 399 400 # Insert a key at the current level (key_height == tree_height). 401 # Splits the subtree at the insertion point recursively. 402 @spec put_here(store(), Node.t(), binary(), CID.t()) :: 403 {:ok, CID.t(), store()} | tree_error() 404 defp put_here(store, node, key, value) do 405 {kv_pairs, subtrees} = node_to_arrays(node) 406 keys = Enum.map(kv_pairs, &elem(&1, 0)) 407 idx = lower_bound(keys, key) 408 409 if idx < Kernel.length(keys) and Enum.at(keys, idx) == key do 410 # Overwrite existing key. 411 new_kv = List.replace_at(kv_pairs, idx, {key, value}) 412 write_node(store, arrays_to_node(new_kv, subtrees)) 413 else 414 # Split the subtree at the insertion point recursively. 415 with {:ok, lsub, rsub, store} <- split_on_key(store, Enum.at(subtrees, idx), key) do 416 new_kv = List.insert_at(kv_pairs, idx, {key, value}) 417 418 new_subtrees = 419 List.replace_at(subtrees, idx, lsub) |> List.insert_at(idx + 1, rsub) 420 421 write_node(store, arrays_to_node(new_kv, new_subtrees)) 422 end 423 end 424 end 425 426 # Recursively splits the subtree at `key`. Returns {left_cid, right_cid} 427 # where left contains all keys < `key` and right contains all keys >= `key`. 428 # Either side may be nil if empty. 429 @spec split_on_key(store(), CID.t() | nil, binary()) :: 430 {:ok, CID.t() | nil, CID.t() | nil, store()} | tree_error() 431 defp split_on_key(store, nil, _key), do: {:ok, nil, nil, store} 432 433 defp split_on_key(store, cid, key) do 434 with {:ok, node} <- fetch_node(store, cid) do 435 {kv_pairs, subtrees} = node_to_arrays(node) 436 keys = Enum.map(kv_pairs, &elem(&1, 0)) 437 idx = lower_bound(keys, key) 438 439 # Recursively split the subtree at the boundary position. 440 with {:ok, inner_l, inner_r, store} <- 441 split_on_key(store, Enum.at(subtrees, idx), key) do 442 left_kv = Enum.take(kv_pairs, idx) 443 left_subs = Enum.take(subtrees, idx) ++ [inner_l] 444 445 right_kv = Enum.drop(kv_pairs, idx) 446 right_subs = [inner_r | Enum.drop(subtrees, idx + 1)] 447 448 with {:ok, left_cid, store} <- 449 write_node_to_nullable(store, arrays_to_node(left_kv, left_subs)), 450 {:ok, right_cid, store} <- 451 write_node_to_nullable(store, arrays_to_node(right_kv, right_subs)) do 452 {:ok, left_cid, right_cid, store} 453 end 454 end 455 end 456 end 457 458 # --------------------------------------------------------------------------- 459 # Private — delete 460 # --------------------------------------------------------------------------- 461 462 # Recursive delete. `tree_height` is the known height of the node at `cid`. 463 @spec do_remove(store(), CID.t(), binary(), non_neg_integer(), non_neg_integer()) :: 464 {:ok, CID.t() | nil, store()} | {:error, :not_found} | tree_error() 465 defp do_remove(store, cid, key, key_height, tree_height) do 466 with {:ok, node} <- fetch_node(store, cid) do 467 cond do 468 key_height > tree_height -> 469 {:error, :not_found} 470 471 key_height < tree_height -> 472 {kv_pairs, subtrees} = node_to_arrays(node) 473 keys = Enum.map(kv_pairs, &elem(&1, 0)) 474 idx = lower_bound(keys, key) 475 476 case Enum.at(subtrees, idx) do 477 nil -> 478 {:error, :not_found} 479 480 sub_cid -> 481 with {:ok, new_sub, store} <- 482 do_remove(store, sub_cid, key, key_height, tree_height - 1) do 483 new_subtrees = List.replace_at(subtrees, idx, new_sub) 484 write_node_to_nullable(store, arrays_to_node(kv_pairs, new_subtrees)) 485 end 486 end 487 488 true -> 489 # key_height == tree_height — key must be at this level if it exists. 490 {kv_pairs, subtrees} = node_to_arrays(node) 491 keys = Enum.map(kv_pairs, &elem(&1, 0)) 492 idx = lower_bound(keys, key) 493 494 if idx < Kernel.length(keys) and Enum.at(keys, idx) == key do 495 # Found! Merge the adjacent subtrees that flanked the deleted key. 496 with {:ok, merged_sub, store} <- 497 do_merge(store, Enum.at(subtrees, idx), Enum.at(subtrees, idx + 1)) do 498 new_kv = List.delete_at(kv_pairs, idx) 499 500 new_subtrees = 501 Enum.take(subtrees, idx) ++ [merged_sub | Enum.drop(subtrees, idx + 2)] 502 503 write_node_to_nullable(store, arrays_to_node(new_kv, new_subtrees)) 504 end 505 else 506 {:error, :not_found} 507 end 508 end 509 end 510 end 511 512 # Recursively merges two adjacent subtree pointers. The boundary subtrees 513 # (rightmost of left, leftmost of right) are merged recursively. 514 @spec do_merge(store(), CID.t() | nil, CID.t() | nil) :: 515 {:ok, CID.t() | nil, store()} | tree_error() 516 defp do_merge(store, nil, right_cid), do: {:ok, right_cid, store} 517 defp do_merge(store, left_cid, nil), do: {:ok, left_cid, store} 518 519 defp do_merge(store, left_cid, right_cid) do 520 with {:ok, left_node} <- fetch_node(store, left_cid), 521 {:ok, right_node} <- fetch_node(store, right_cid) do 522 {left_kv, left_subs} = node_to_arrays(left_node) 523 {right_kv, right_subs} = node_to_arrays(right_node) 524 525 with {:ok, merged_boundary, store} <- 526 do_merge(store, List.last(left_subs), hd(right_subs)) do 527 new_kv = left_kv ++ right_kv 528 new_subs = Enum.slice(left_subs, 0..-2//1) ++ [merged_boundary | tl(right_subs)] 529 write_node_to_nullable(store, arrays_to_node(new_kv, new_subs)) 530 end 531 end 532 end 533 534 # Strips empty wrapper nodes from the top of the tree. Only called after 535 # a top-level delete — intermediate empty nodes are preserved during 536 # recursive descent. 537 @spec trim_top(store(), CID.t() | nil) :: {:ok, CID.t() | nil, store()} | tree_error() 538 defp trim_top(store, nil), do: {:ok, nil, store} 539 540 defp trim_top(store, cid) do 541 with {:ok, node} <- fetch_node(store, cid) do 542 cond do 543 node.entries != [] -> {:ok, cid, store} 544 node.left == nil -> {:ok, nil, store} 545 true -> trim_top(store, node.left) 546 end 547 end 548 end 549 550 # --------------------------------------------------------------------------- 551 # Private — in-order traversal (to_list) 552 # --------------------------------------------------------------------------- 553 554 @spec walk(store(), CID.t(), [{binary(), CID.t()}]) :: 555 {:ok, [{binary(), CID.t()}]} | tree_error() 556 defp walk(store, cid, acc) do 557 with {:ok, node} <- fetch_node(store, cid) do 558 full_keys = Node.keys(node) 559 walk_node(store, node, full_keys, acc) 560 end 561 end 562 563 @spec walk_node(store(), Node.t(), [binary()], [{binary(), CID.t()}]) :: 564 {:ok, [{binary(), CID.t()}]} | tree_error() 565 defp walk_node(store, node, full_keys, acc) do 566 # Walk in-order: left subtree, then entries interleaved with right subtrees. 567 # We collect in reverse for efficiency, then reverse at the end. 568 with {:ok, acc} <- walk_subtree(store, node.left, acc) do 569 walk_entries(store, node.entries, full_keys, acc) 570 end 571 end 572 573 @spec walk_subtree(store(), CID.t() | nil, [{binary(), CID.t()}]) :: 574 {:ok, [{binary(), CID.t()}]} | tree_error() 575 defp walk_subtree(_store, nil, acc), do: {:ok, acc} 576 defp walk_subtree(store, cid, acc), do: walk(store, cid, acc) 577 578 @spec walk_entries(store(), [Entry.t()], [binary()], [{binary(), CID.t()}]) :: 579 {:ok, [{binary(), CID.t()}]} | tree_error() 580 # Accumulates pairs in reverse order; to_list/1 reverses once at the top. 581 defp walk_entries(_store, [], [], acc), do: {:ok, acc} 582 583 defp walk_entries(store, [entry | rest_e], [key | rest_k], acc) do 584 acc = [{key, entry.value} | acc] 585 586 with {:ok, acc} <- walk_subtree(store, entry.right, acc) do 587 walk_entries(store, rest_e, rest_k, acc) 588 end 589 end 590 591 # --------------------------------------------------------------------------- 592 # Private — stream helpers 593 # --------------------------------------------------------------------------- 594 595 # Expand a node into an in-order sequence of {:visit, cid} and {:yield, k, v} 596 # items. When prepended to a DFS stack, this produces correct left-to-right 597 # traversal: left subtree is visited first, then each entry interleaved with 598 # its right subtree. 599 @spec in_order_items(Node.t(), [binary()]) :: list() 600 defp in_order_items(node, full_keys) do 601 left_items = if node.left, do: [{:visit, node.left}], else: [] 602 603 entry_items = 604 Enum.zip(node.entries, full_keys) 605 |> Enum.flat_map(fn {e, k} -> 606 right_items = if e.right, do: [{:visit, e.right}], else: [] 607 [{:yield, k, e.value} | right_items] 608 end) 609 610 left_items ++ entry_items 611 end 612 613 # --------------------------------------------------------------------------- 614 # Private — block collection 615 # --------------------------------------------------------------------------- 616 617 @spec collect_reachable(store(), CID.t(), %{CID.t() => binary()}) :: 618 {:ok, %{CID.t() => binary()}} | tree_error() 619 defp collect_reachable(store, cid, acc) do 620 if Map.has_key?(acc, cid) do 621 {:ok, acc} 622 else 623 with {:ok, node} <- fetch_node(store, cid), 624 {:ok, bytes} <- Node.encode(node) do 625 acc = Map.put(acc, cid, bytes) 626 collect_children(store, node, acc) 627 else 628 {:error, :not_found} -> {:error, :missing_node} 629 {:error, :encode, reason} -> {:error, reason} 630 end 631 end 632 end 633 634 @spec collect_children(store(), Node.t(), %{CID.t() => binary()}) :: 635 {:ok, %{CID.t() => binary()}} | tree_error() 636 defp collect_children(store, node, acc) do 637 subtrees = 638 if(node.left, do: [node.left], else: []) ++ 639 Enum.flat_map(node.entries, fn e -> if e.right, do: [e.right], else: [] end) 640 641 Enum.reduce_while(subtrees, {:ok, acc}, fn cid, {:ok, acc} -> 642 case collect_reachable(store, cid, acc) do 643 {:ok, acc} -> {:cont, {:ok, acc}} 644 err -> {:halt, err} 645 end 646 end) 647 end 648 649 # --------------------------------------------------------------------------- 650 # Private — node I/O 651 # --------------------------------------------------------------------------- 652 653 @spec fetch_node(store(), CID.t()) :: {:ok, Node.t()} | tree_error() 654 defp fetch_node(store, cid) do 655 case Store.get(store, cid) do 656 {:ok, node} -> {:ok, node} 657 {:error, :not_found} -> {:error, :missing_node} 658 end 659 end 660 661 @spec fetch_node!(store(), CID.t()) :: Node.t() 662 defp fetch_node!(store, cid) do 663 case Store.get(store, cid) do 664 {:ok, node} -> node 665 {:error, :not_found} -> raise "MST node not found: #{CID.encode(cid)}" 666 end 667 end 668 669 @spec write_node(store(), Node.t()) :: {:ok, CID.t(), store()} | tree_error() 670 defp write_node(store, node) do 671 case Node.cid(node) do 672 {:ok, cid} -> {:ok, cid, Store.put(store, cid, node)} 673 {:error, :encode, reason} -> {:error, reason} 674 end 675 end 676 677 # Write a node unless it is truly empty (no entries, no left). Returns nil 678 # for empty leaf-level nodes; preserves empty intermediate nodes that have 679 # a left subtree pointer. 680 @spec write_node_to_nullable(store(), Node.t()) :: 681 {:ok, CID.t() | nil, store()} | tree_error() 682 defp write_node_to_nullable(store, %Node{left: nil, entries: []}), do: {:ok, nil, store} 683 defp write_node_to_nullable(store, node), do: write_node(store, node) 684 685 # Wraps a CID in `n` empty intermediate nodes (left-pointer only). 686 @spec wrap_with_empty_layers(store(), CID.t(), non_neg_integer()) :: 687 {:ok, CID.t(), store()} | tree_error() 688 defp wrap_with_empty_layers(store, cid, 0), do: {:ok, cid, store} 689 690 defp wrap_with_empty_layers(store, cid, n) when n > 0 do 691 wrapper = %Node{left: cid, entries: []} 692 693 with {:ok, wrapper_cid, store} <- write_node(store, wrapper) do 694 wrap_with_empty_layers(store, wrapper_cid, n - 1) 695 end 696 end 697 698 # --------------------------------------------------------------------------- 699 # Private — key position helpers 700 # --------------------------------------------------------------------------- 701 702 # Returns the position of `key` in the sorted `full_keys` list: 703 # {:found, idx} — key is at index idx 704 # {:left} — key < all keys (belongs in left subtree) 705 # {:right, idx} — key > keys[idx] (belongs in right subtree of entry idx) 706 @spec locate([binary()], binary()) :: 707 {:found, non_neg_integer()} | {:left} | {:right, non_neg_integer()} 708 defp locate([], _key), do: {:left} 709 710 defp locate(keys, key) do 711 n = Kernel.length(keys) 712 bin_locate(keys, key, 0, n - 1, n) 713 end 714 715 @spec bin_locate([binary()], binary(), integer(), integer(), non_neg_integer()) :: 716 {:found, non_neg_integer()} | {:left} | {:right, non_neg_integer()} 717 defp bin_locate(_keys, _key, lo, hi, _n) when lo > hi do 718 if lo == 0, do: {:left}, else: {:right, lo - 1} 719 end 720 721 defp bin_locate(keys, key, lo, hi, n) do 722 mid = div(lo + hi, 2) 723 mid_key = Enum.at(keys, mid) 724 725 cond do 726 mid_key == key -> {:found, mid} 727 mid_key < key -> bin_locate(keys, key, mid + 1, hi, n) 728 true -> bin_locate(keys, key, lo, mid - 1, n) 729 end 730 end 731 732 # Returns the index of the first key >= `target`, or `length(keys)` if none. 733 @spec lower_bound([binary()], binary()) :: non_neg_integer() 734 defp lower_bound(keys, target) do 735 Enum.find_index(keys, fn k -> k >= target end) || Kernel.length(keys) 736 end 737 738 # --------------------------------------------------------------------------- 739 # Private — layer inference 740 # --------------------------------------------------------------------------- 741 742 # Infer the MST layer of a non-empty node from its first entry's key. 743 @spec node_layer(Node.t()) :: non_neg_integer() | nil 744 defp node_layer(%Node{entries: []}), do: nil 745 746 defp node_layer(%Node{entries: [first | _]}) do 747 Height.for_key(first.key_suffix) 748 end 749 750 # Compute the height of a node, walking into children if the node has no 751 # entries (empty intermediate nodes). 752 @spec require_height(store(), Node.t()) :: non_neg_integer() 753 defp require_height(store, node) do 754 case node_layer(node) do 755 nil -> 756 if node.left do 757 {:ok, child} = fetch_node(store, node.left) 758 require_height(store, child) + 1 759 else 760 0 761 end 762 763 h -> 764 h 765 end 766 end 767 768 # --------------------------------------------------------------------------- 769 # Private — construction helpers 770 # --------------------------------------------------------------------------- 771 772 @spec leaf_node(binary(), CID.t()) :: Node.t() 773 defp leaf_node(key, value) do 774 %Node{ 775 left: nil, 776 entries: [%Entry{prefix_len: 0, key_suffix: key, value: value, right: nil}] 777 } 778 end 779 780 # --------------------------------------------------------------------------- 781 # Private — node array conversions 782 # --------------------------------------------------------------------------- 783 784 # Converts a node into a parallel-array representation: 785 # {[{key, value}], [subtree_cid | nil]} 786 # where subtrees has length(kv_pairs) + 1. 787 # subtrees[0] = node.left, subtrees[i+1] = entries[i].right. 788 @spec node_to_arrays(Node.t()) :: {[{binary(), CID.t()}], [CID.t() | nil]} 789 defp node_to_arrays(node) do 790 full_keys = Node.keys(node) 791 kv_pairs = Enum.zip(full_keys, Enum.map(node.entries, & &1.value)) 792 subtrees = [node.left | Enum.map(node.entries, & &1.right)] 793 {kv_pairs, subtrees} 794 end 795 796 # Converts the parallel-array representation back to a `Node`. 797 @spec arrays_to_node([{binary(), CID.t()}], [CID.t() | nil]) :: Node.t() 798 defp arrays_to_node(kv_pairs, subtrees) do 799 [left | right_ptrs] = subtrees 800 801 triples = 802 Enum.zip(kv_pairs, right_ptrs) 803 |> Enum.map(fn {{k, v}, r} -> {k, v, r} end) 804 805 entries = Node.compress_entries(triples) 806 %Node{left: left, entries: entries} 807 end 808end