An Elixir implementation of AT Protocol-flavoured Merkle Search Trees (MST)
1defmodule MST.Tree do
2 @moduledoc """
3 An in-memory Merkle Search Tree.
4
5 `MST.Tree` is the primary interface for building and querying MSTs. It pairs
6 a root CID (or `nil` for an empty tree) with a block store that maps CIDs to
7 decoded `MST.Node` structs.
8
9 All mutation operations (`put/3`, `delete/3`) return a new `MST.Tree` —
10 the data structure is persistent/immutable. The underlying store accumulates
11 all written nodes across mutations; stale nodes are not removed (no GC).
12
13 ## Example
14
15 store = MST.Store.Memory.new()
16 tree = MST.Tree.new(store)
17
18 val = DASL.CID.compute("my record data")
19 {:ok, tree} = MST.Tree.put(tree, "collection/record-key", val)
20 {:ok, ^val} = MST.Tree.get(tree, "collection/record-key")
21
22 {:ok, tree} = MST.Tree.delete(tree, "collection/record-key")
23 {:error, :not_found} = MST.Tree.get(tree, "collection/record-key")
24
25 """
26
27 use TypedStruct
28 import Kernel, except: [length: 1]
29
30 alias DASL.CID
31 alias MST.{Height, Node, Node.Entry, Store}
32
33 @type store() :: Store.t()
34 @type tree_error() :: {:error, atom()}
35
36 typedstruct enforce: true do
37 field :root, CID.t() | nil
38 field :store, store()
39 end
40
41 # ---------------------------------------------------------------------------
42 # Construction
43 # ---------------------------------------------------------------------------
44
45 @doc """
46 Returns a new, empty tree backed by the given store.
47
48 ## Examples
49
50 iex> tree = MST.Tree.new(MST.Store.Memory.new())
51 iex> tree.root
52 nil
53
54 """
55 @spec new(store()) :: t()
56 def new(store), do: %__MODULE__{root: nil, store: store}
57
58 @doc """
59 Returns a tree referencing an existing root node CID in the given store.
60
61 Use this to wrap an already-populated store (e.g. after loading from a CAR
62 file).
63
64 ## Examples
65
66 iex> store = MST.Store.Memory.new()
67 iex> node = MST.Node.empty()
68 iex> {:ok, cid} = MST.Node.cid(node)
69 iex> store = MST.Store.put(store, cid, node)
70 iex> tree = MST.Tree.from_root(cid, store)
71 iex> tree.root == cid
72 true
73
74 """
75 @spec from_root(CID.t() | nil, store()) :: t()
76 def from_root(root, store), do: %__MODULE__{root: root, store: store}
77
78 # ---------------------------------------------------------------------------
79 # Lookup
80 # ---------------------------------------------------------------------------
81
82 @doc """
83 Looks up `key` in the tree.
84
85 Returns `{:ok, value_cid}` if found, `{:error, :not_found}` otherwise.
86
87 ## Examples
88
89 iex> store = MST.Store.Memory.new()
90 iex> tree = MST.Tree.new(store)
91 iex> MST.Tree.get(tree, "col/key")
92 {:error, :not_found}
93
94 """
95 @spec get(t(), binary()) :: {:ok, CID.t()} | {:error, :not_found} | tree_error()
96 def get(%__MODULE__{root: nil}, _key), do: {:error, :not_found}
97
98 def get(%__MODULE__{root: root, store: store}, key) do
99 search(store, root, key)
100 end
101
102 # ---------------------------------------------------------------------------
103 # Mutation
104 # ---------------------------------------------------------------------------
105
106 @doc """
107 Inserts or updates `key` → `value` in the tree.
108
109 Returns `{:ok, new_tree}` on success. The new tree shares the store with the
110 old tree, but both may be used independently — nodes are append-only.
111
112 ## Examples
113
114 iex> store = MST.Store.Memory.new()
115 iex> tree = MST.Tree.new(store)
116 iex> val = DASL.CID.compute("data")
117 iex> {:ok, tree2} = MST.Tree.put(tree, "col/key", val)
118 iex> MST.Tree.get(tree2, "col/key")
119 {:ok, val}
120
121 """
122 @spec put(t(), binary(), CID.t()) :: {:ok, t()} | tree_error()
123 def put(%__MODULE__{root: nil, store: store}, key, value) do
124 # Empty tree — create a leaf node. No intermediate layers needed for a
125 # single-key tree (the spec says empty nodes at the top must be pruned).
126 node = leaf_node(key, value)
127
128 with {:ok, root, store} <- write_node(store, node) do
129 {:ok, %__MODULE__{root: root, store: store}}
130 end
131 end
132
133 def put(%__MODULE__{root: root, store: store}, key, value) do
134 with {:ok, root_node} <- fetch_node(store, root) do
135 if root_node.entries == [] and root_node.left == nil do
136 # Empty root from CAR import — treat as fresh empty tree.
137 put(%__MODULE__{root: nil, store: store}, key, value)
138 else
139 key_height = Height.for_key(key)
140 tree_height = require_height(store, root_node)
141
142 with {:ok, new_root, store} <-
143 do_insert(store, root, key, value, key_height, tree_height) do
144 {:ok, %__MODULE__{root: new_root, store: store}}
145 end
146 end
147 end
148 end
149
150 @doc """
151 Removes `key` from the tree.
152
153 Returns `{:ok, new_tree}` on success, `{:error, :not_found}` if the key
154 does not exist.
155
156 ## Examples
157
158 iex> store = MST.Store.Memory.new()
159 iex> tree = MST.Tree.new(store)
160 iex> val = DASL.CID.compute("data")
161 iex> {:ok, tree2} = MST.Tree.put(tree, "col/key", val)
162 iex> {:ok, tree3} = MST.Tree.delete(tree2, "col/key")
163 iex> MST.Tree.get(tree3, "col/key")
164 {:error, :not_found}
165
166 """
167 @spec delete(t(), binary()) :: {:ok, t()} | {:error, :not_found} | tree_error()
168 def delete(%__MODULE__{root: nil}, _key), do: {:error, :not_found}
169
170 def delete(%__MODULE__{root: root, store: store}, key) do
171 with {:ok, root_node} <- fetch_node(store, root) do
172 key_height = Height.for_key(key)
173 tree_height = require_height(store, root_node)
174
175 with {:ok, new_root, store} <-
176 do_remove(store, root, key, key_height, tree_height) do
177 # Trim empty wrappers from the top after deletion.
178 {:ok, new_root, store} = trim_top(store, new_root)
179 {:ok, %__MODULE__{root: new_root, store: store}}
180 end
181 end
182 end
183
184 # ---------------------------------------------------------------------------
185 # Traversal
186 # ---------------------------------------------------------------------------
187
188 @doc """
189 Returns all key-value pairs in the tree as a sorted list.
190
191 ## Examples
192
193 iex> store = MST.Store.Memory.new()
194 iex> tree = MST.Tree.new(store)
195 iex> val = DASL.CID.compute("data")
196 iex> {:ok, tree} = MST.Tree.put(tree, "col/b", val)
197 iex> {:ok, tree} = MST.Tree.put(tree, "col/a", val)
198 iex> {:ok, pairs} = MST.Tree.to_list(tree)
199 iex> Enum.map(pairs, &elem(&1, 0))
200 ["col/a", "col/b"]
201
202 """
203 @spec to_list(t()) :: {:ok, [{binary(), CID.t()}]} | tree_error()
204 def to_list(%__MODULE__{root: nil}), do: {:ok, []}
205
206 def to_list(%__MODULE__{root: root, store: store}) do
207 with {:ok, reversed} <- walk(store, root, []) do
208 {:ok, Enum.reverse(reversed)}
209 end
210 end
211
212 @doc """
213 Returns a lazy stream of `{key, value_cid}` pairs in sorted order.
214
215 The stream reads nodes from the store on demand. Raises on missing nodes
216 (consistent with lazy stream semantics).
217
218 ## Examples
219
220 iex> store = MST.Store.Memory.new()
221 iex> tree = MST.Tree.new(store)
222 iex> val = DASL.CID.compute("x")
223 iex> {:ok, tree} = MST.Tree.put(tree, "col/a", val)
224 iex> tree |> MST.Tree.stream() |> Enum.to_list()
225 [{"col/a", val}]
226
227 """
228 @spec stream(t()) :: Enumerable.t()
229 def stream(%__MODULE__{root: nil}), do: []
230
231 def stream(%__MODULE__{root: root, store: store}) do
232 Stream.resource(
233 fn -> [{:visit, root}] end,
234 fn
235 [] ->
236 {:halt, []}
237
238 [{:yield, k, v} | rest] ->
239 {[{k, v}], rest}
240
241 [{:visit, cid} | rest] ->
242 node = fetch_node!(store, cid)
243 full_keys = Node.keys(node)
244 {[], in_order_items(node, full_keys) ++ rest}
245 end,
246 fn _ -> :ok end
247 )
248 end
249
250 @doc """
251 Returns the number of key-value pairs in the tree.
252
253 ## Examples
254
255 iex> store = MST.Store.Memory.new()
256 iex> tree = MST.Tree.new(store)
257 iex> {:ok, 0} = MST.Tree.length(tree)
258 iex> val = DASL.CID.compute("x")
259 iex> {:ok, tree} = MST.Tree.put(tree, "col/a", val)
260 iex> MST.Tree.length(tree)
261 {:ok, 1}
262
263 """
264 @spec length(t()) :: {:ok, non_neg_integer()} | tree_error()
265 def length(tree) do
266 with {:ok, pairs} <- to_list(tree) do
267 {:ok, Kernel.length(pairs)}
268 end
269 end
270
271 # ---------------------------------------------------------------------------
272 # Block collection
273 # ---------------------------------------------------------------------------
274
275 @doc """
276 Collects all MST nodes reachable from the root into a map of CID → encoded bytes.
277
278 Useful for serialising the tree to a CAR file.
279
280 ## Examples
281
282 iex> store = MST.Store.Memory.new()
283 iex> tree = MST.Tree.new(store)
284 iex> val = DASL.CID.compute("x")
285 iex> {:ok, tree} = MST.Tree.put(tree, "col/a", val)
286 iex> {:ok, blocks} = MST.Tree.collect_blocks(tree)
287 iex> map_size(blocks) >= 1
288 true
289
290 """
291 @spec collect_blocks(t()) :: {:ok, %{CID.t() => binary()}} | tree_error()
292 def collect_blocks(%__MODULE__{root: nil}), do: {:ok, %{}}
293
294 def collect_blocks(%__MODULE__{root: root, store: store}) do
295 collect_reachable(store, root, %{})
296 end
297
298 # ---------------------------------------------------------------------------
299 # Private — search
300 # ---------------------------------------------------------------------------
301
302 @spec search(store(), CID.t(), binary()) ::
303 {:ok, CID.t()} | {:error, :not_found} | tree_error()
304 defp search(store, cid, key) do
305 with {:ok, node} <- fetch_node(store, cid) do
306 full_keys = Node.keys(node)
307 search_node(store, node, full_keys, key)
308 end
309 end
310
311 @spec search_node(store(), Node.t(), [binary()], binary()) ::
312 {:ok, CID.t()} | {:error, :not_found} | tree_error()
313 defp search_node(store, node, full_keys, key) do
314 case locate(full_keys, key) do
315 {:found, idx} ->
316 {:ok, Enum.at(node.entries, idx).value}
317
318 {:left} ->
319 descend(store, node.left, key)
320
321 {:right, idx} ->
322 descend(store, Enum.at(node.entries, idx).right, key)
323 end
324 end
325
326 @spec descend(store(), CID.t() | nil, binary()) ::
327 {:ok, CID.t()} | {:error, :not_found} | tree_error()
328 defp descend(_store, nil, _key), do: {:error, :not_found}
329 defp descend(store, cid, key), do: search(store, cid, key)
330
331 # ---------------------------------------------------------------------------
332 # Private — insert
333 # ---------------------------------------------------------------------------
334
335 # Recursive insert into the subtree rooted at `cid`.
336 # `tree_height` is the known height of the node (threaded from the parent).
337 @spec do_insert(store(), CID.t(), binary(), CID.t(), non_neg_integer(), non_neg_integer()) ::
338 {:ok, CID.t(), store()} | tree_error()
339 defp do_insert(store, cid, key, value, key_height, tree_height) do
340 with {:ok, node} <- fetch_node(store, cid) do
341 cond do
342 key_height > tree_height ->
343 # Key belongs at a higher layer. Wrap the current node in an empty
344 # parent and recurse at tree_height + 1.
345 wrapper = %Node{left: cid, entries: []}
346
347 with {:ok, wrapper_cid, store} <- write_node(store, wrapper) do
348 do_insert(store, wrapper_cid, key, value, key_height, tree_height + 1)
349 end
350
351 key_height < tree_height ->
352 # Descend into the appropriate subtree.
353 {kv_pairs, subtrees} = node_to_arrays(node)
354 keys = Enum.map(kv_pairs, &elem(&1, 0))
355 idx = lower_bound(keys, key)
356 subtree_cid = Enum.at(subtrees, idx)
357
358 with {:ok, new_sub, store} <-
359 insert_into_subtree(
360 store,
361 subtree_cid,
362 key,
363 value,
364 key_height,
365 tree_height - 1
366 ) do
367 new_subtrees = List.replace_at(subtrees, idx, new_sub)
368 write_node(store, arrays_to_node(kv_pairs, new_subtrees))
369 end
370
371 true ->
372 # key_height == tree_height — insert at this level.
373 put_here(store, node, key, value)
374 end
375 end
376 end
377
378 # Insert into a subtree that may be nil. When nil, creates a new leaf and
379 # wraps it in as many empty intermediate layers as needed.
380 @spec insert_into_subtree(
381 store(),
382 CID.t() | nil,
383 binary(),
384 CID.t(),
385 non_neg_integer(),
386 non_neg_integer()
387 ) :: {:ok, CID.t(), store()} | tree_error()
388 defp insert_into_subtree(store, nil, key, value, key_height, expected_height) do
389 leaf = leaf_node(key, value)
390
391 with {:ok, leaf_cid, store} <- write_node(store, leaf) do
392 wrap_with_empty_layers(store, leaf_cid, expected_height - key_height)
393 end
394 end
395
396 defp insert_into_subtree(store, cid, key, value, key_height, expected_height) do
397 do_insert(store, cid, key, value, key_height, expected_height)
398 end
399
400 # Insert a key at the current level (key_height == tree_height).
401 # Splits the subtree at the insertion point recursively.
402 @spec put_here(store(), Node.t(), binary(), CID.t()) ::
403 {:ok, CID.t(), store()} | tree_error()
404 defp put_here(store, node, key, value) do
405 {kv_pairs, subtrees} = node_to_arrays(node)
406 keys = Enum.map(kv_pairs, &elem(&1, 0))
407 idx = lower_bound(keys, key)
408
409 if idx < Kernel.length(keys) and Enum.at(keys, idx) == key do
410 # Overwrite existing key.
411 new_kv = List.replace_at(kv_pairs, idx, {key, value})
412 write_node(store, arrays_to_node(new_kv, subtrees))
413 else
414 # Split the subtree at the insertion point recursively.
415 with {:ok, lsub, rsub, store} <- split_on_key(store, Enum.at(subtrees, idx), key) do
416 new_kv = List.insert_at(kv_pairs, idx, {key, value})
417
418 new_subtrees =
419 List.replace_at(subtrees, idx, lsub) |> List.insert_at(idx + 1, rsub)
420
421 write_node(store, arrays_to_node(new_kv, new_subtrees))
422 end
423 end
424 end
425
426 # Recursively splits the subtree at `key`. Returns {left_cid, right_cid}
427 # where left contains all keys < `key` and right contains all keys >= `key`.
428 # Either side may be nil if empty.
429 @spec split_on_key(store(), CID.t() | nil, binary()) ::
430 {:ok, CID.t() | nil, CID.t() | nil, store()} | tree_error()
431 defp split_on_key(store, nil, _key), do: {:ok, nil, nil, store}
432
433 defp split_on_key(store, cid, key) do
434 with {:ok, node} <- fetch_node(store, cid) do
435 {kv_pairs, subtrees} = node_to_arrays(node)
436 keys = Enum.map(kv_pairs, &elem(&1, 0))
437 idx = lower_bound(keys, key)
438
439 # Recursively split the subtree at the boundary position.
440 with {:ok, inner_l, inner_r, store} <-
441 split_on_key(store, Enum.at(subtrees, idx), key) do
442 left_kv = Enum.take(kv_pairs, idx)
443 left_subs = Enum.take(subtrees, idx) ++ [inner_l]
444
445 right_kv = Enum.drop(kv_pairs, idx)
446 right_subs = [inner_r | Enum.drop(subtrees, idx + 1)]
447
448 with {:ok, left_cid, store} <-
449 write_node_to_nullable(store, arrays_to_node(left_kv, left_subs)),
450 {:ok, right_cid, store} <-
451 write_node_to_nullable(store, arrays_to_node(right_kv, right_subs)) do
452 {:ok, left_cid, right_cid, store}
453 end
454 end
455 end
456 end
457
458 # ---------------------------------------------------------------------------
459 # Private — delete
460 # ---------------------------------------------------------------------------
461
462 # Recursive delete. `tree_height` is the known height of the node at `cid`.
463 @spec do_remove(store(), CID.t(), binary(), non_neg_integer(), non_neg_integer()) ::
464 {:ok, CID.t() | nil, store()} | {:error, :not_found} | tree_error()
465 defp do_remove(store, cid, key, key_height, tree_height) do
466 with {:ok, node} <- fetch_node(store, cid) do
467 cond do
468 key_height > tree_height ->
469 {:error, :not_found}
470
471 key_height < tree_height ->
472 {kv_pairs, subtrees} = node_to_arrays(node)
473 keys = Enum.map(kv_pairs, &elem(&1, 0))
474 idx = lower_bound(keys, key)
475
476 case Enum.at(subtrees, idx) do
477 nil ->
478 {:error, :not_found}
479
480 sub_cid ->
481 with {:ok, new_sub, store} <-
482 do_remove(store, sub_cid, key, key_height, tree_height - 1) do
483 new_subtrees = List.replace_at(subtrees, idx, new_sub)
484 write_node_to_nullable(store, arrays_to_node(kv_pairs, new_subtrees))
485 end
486 end
487
488 true ->
489 # key_height == tree_height — key must be at this level if it exists.
490 {kv_pairs, subtrees} = node_to_arrays(node)
491 keys = Enum.map(kv_pairs, &elem(&1, 0))
492 idx = lower_bound(keys, key)
493
494 if idx < Kernel.length(keys) and Enum.at(keys, idx) == key do
495 # Found! Merge the adjacent subtrees that flanked the deleted key.
496 with {:ok, merged_sub, store} <-
497 do_merge(store, Enum.at(subtrees, idx), Enum.at(subtrees, idx + 1)) do
498 new_kv = List.delete_at(kv_pairs, idx)
499
500 new_subtrees =
501 Enum.take(subtrees, idx) ++ [merged_sub | Enum.drop(subtrees, idx + 2)]
502
503 write_node_to_nullable(store, arrays_to_node(new_kv, new_subtrees))
504 end
505 else
506 {:error, :not_found}
507 end
508 end
509 end
510 end
511
512 # Recursively merges two adjacent subtree pointers. The boundary subtrees
513 # (rightmost of left, leftmost of right) are merged recursively.
514 @spec do_merge(store(), CID.t() | nil, CID.t() | nil) ::
515 {:ok, CID.t() | nil, store()} | tree_error()
516 defp do_merge(store, nil, right_cid), do: {:ok, right_cid, store}
517 defp do_merge(store, left_cid, nil), do: {:ok, left_cid, store}
518
519 defp do_merge(store, left_cid, right_cid) do
520 with {:ok, left_node} <- fetch_node(store, left_cid),
521 {:ok, right_node} <- fetch_node(store, right_cid) do
522 {left_kv, left_subs} = node_to_arrays(left_node)
523 {right_kv, right_subs} = node_to_arrays(right_node)
524
525 with {:ok, merged_boundary, store} <-
526 do_merge(store, List.last(left_subs), hd(right_subs)) do
527 new_kv = left_kv ++ right_kv
528 new_subs = Enum.slice(left_subs, 0..-2//1) ++ [merged_boundary | tl(right_subs)]
529 write_node_to_nullable(store, arrays_to_node(new_kv, new_subs))
530 end
531 end
532 end
533
534 # Strips empty wrapper nodes from the top of the tree. Only called after
535 # a top-level delete — intermediate empty nodes are preserved during
536 # recursive descent.
537 @spec trim_top(store(), CID.t() | nil) :: {:ok, CID.t() | nil, store()} | tree_error()
538 defp trim_top(store, nil), do: {:ok, nil, store}
539
540 defp trim_top(store, cid) do
541 with {:ok, node} <- fetch_node(store, cid) do
542 cond do
543 node.entries != [] -> {:ok, cid, store}
544 node.left == nil -> {:ok, nil, store}
545 true -> trim_top(store, node.left)
546 end
547 end
548 end
549
550 # ---------------------------------------------------------------------------
551 # Private — in-order traversal (to_list)
552 # ---------------------------------------------------------------------------
553
554 @spec walk(store(), CID.t(), [{binary(), CID.t()}]) ::
555 {:ok, [{binary(), CID.t()}]} | tree_error()
556 defp walk(store, cid, acc) do
557 with {:ok, node} <- fetch_node(store, cid) do
558 full_keys = Node.keys(node)
559 walk_node(store, node, full_keys, acc)
560 end
561 end
562
563 @spec walk_node(store(), Node.t(), [binary()], [{binary(), CID.t()}]) ::
564 {:ok, [{binary(), CID.t()}]} | tree_error()
565 defp walk_node(store, node, full_keys, acc) do
566 # Walk in-order: left subtree, then entries interleaved with right subtrees.
567 # We collect in reverse for efficiency, then reverse at the end.
568 with {:ok, acc} <- walk_subtree(store, node.left, acc) do
569 walk_entries(store, node.entries, full_keys, acc)
570 end
571 end
572
573 @spec walk_subtree(store(), CID.t() | nil, [{binary(), CID.t()}]) ::
574 {:ok, [{binary(), CID.t()}]} | tree_error()
575 defp walk_subtree(_store, nil, acc), do: {:ok, acc}
576 defp walk_subtree(store, cid, acc), do: walk(store, cid, acc)
577
578 @spec walk_entries(store(), [Entry.t()], [binary()], [{binary(), CID.t()}]) ::
579 {:ok, [{binary(), CID.t()}]} | tree_error()
580 # Accumulates pairs in reverse order; to_list/1 reverses once at the top.
581 defp walk_entries(_store, [], [], acc), do: {:ok, acc}
582
583 defp walk_entries(store, [entry | rest_e], [key | rest_k], acc) do
584 acc = [{key, entry.value} | acc]
585
586 with {:ok, acc} <- walk_subtree(store, entry.right, acc) do
587 walk_entries(store, rest_e, rest_k, acc)
588 end
589 end
590
591 # ---------------------------------------------------------------------------
592 # Private — stream helpers
593 # ---------------------------------------------------------------------------
594
595 # Expand a node into an in-order sequence of {:visit, cid} and {:yield, k, v}
596 # items. When prepended to a DFS stack, this produces correct left-to-right
597 # traversal: left subtree is visited first, then each entry interleaved with
598 # its right subtree.
599 @spec in_order_items(Node.t(), [binary()]) :: list()
600 defp in_order_items(node, full_keys) do
601 left_items = if node.left, do: [{:visit, node.left}], else: []
602
603 entry_items =
604 Enum.zip(node.entries, full_keys)
605 |> Enum.flat_map(fn {e, k} ->
606 right_items = if e.right, do: [{:visit, e.right}], else: []
607 [{:yield, k, e.value} | right_items]
608 end)
609
610 left_items ++ entry_items
611 end
612
613 # ---------------------------------------------------------------------------
614 # Private — block collection
615 # ---------------------------------------------------------------------------
616
617 @spec collect_reachable(store(), CID.t(), %{CID.t() => binary()}) ::
618 {:ok, %{CID.t() => binary()}} | tree_error()
619 defp collect_reachable(store, cid, acc) do
620 if Map.has_key?(acc, cid) do
621 {:ok, acc}
622 else
623 with {:ok, node} <- fetch_node(store, cid),
624 {:ok, bytes} <- Node.encode(node) do
625 acc = Map.put(acc, cid, bytes)
626 collect_children(store, node, acc)
627 else
628 {:error, :not_found} -> {:error, :missing_node}
629 {:error, :encode, reason} -> {:error, reason}
630 end
631 end
632 end
633
634 @spec collect_children(store(), Node.t(), %{CID.t() => binary()}) ::
635 {:ok, %{CID.t() => binary()}} | tree_error()
636 defp collect_children(store, node, acc) do
637 subtrees =
638 if(node.left, do: [node.left], else: []) ++
639 Enum.flat_map(node.entries, fn e -> if e.right, do: [e.right], else: [] end)
640
641 Enum.reduce_while(subtrees, {:ok, acc}, fn cid, {:ok, acc} ->
642 case collect_reachable(store, cid, acc) do
643 {:ok, acc} -> {:cont, {:ok, acc}}
644 err -> {:halt, err}
645 end
646 end)
647 end
648
649 # ---------------------------------------------------------------------------
650 # Private — node I/O
651 # ---------------------------------------------------------------------------
652
653 @spec fetch_node(store(), CID.t()) :: {:ok, Node.t()} | tree_error()
654 defp fetch_node(store, cid) do
655 case Store.get(store, cid) do
656 {:ok, node} -> {:ok, node}
657 {:error, :not_found} -> {:error, :missing_node}
658 end
659 end
660
661 @spec fetch_node!(store(), CID.t()) :: Node.t()
662 defp fetch_node!(store, cid) do
663 case Store.get(store, cid) do
664 {:ok, node} -> node
665 {:error, :not_found} -> raise "MST node not found: #{CID.encode(cid)}"
666 end
667 end
668
669 @spec write_node(store(), Node.t()) :: {:ok, CID.t(), store()} | tree_error()
670 defp write_node(store, node) do
671 case Node.cid(node) do
672 {:ok, cid} -> {:ok, cid, Store.put(store, cid, node)}
673 {:error, :encode, reason} -> {:error, reason}
674 end
675 end
676
677 # Write a node unless it is truly empty (no entries, no left). Returns nil
678 # for empty leaf-level nodes; preserves empty intermediate nodes that have
679 # a left subtree pointer.
680 @spec write_node_to_nullable(store(), Node.t()) ::
681 {:ok, CID.t() | nil, store()} | tree_error()
682 defp write_node_to_nullable(store, %Node{left: nil, entries: []}), do: {:ok, nil, store}
683 defp write_node_to_nullable(store, node), do: write_node(store, node)
684
685 # Wraps a CID in `n` empty intermediate nodes (left-pointer only).
686 @spec wrap_with_empty_layers(store(), CID.t(), non_neg_integer()) ::
687 {:ok, CID.t(), store()} | tree_error()
688 defp wrap_with_empty_layers(store, cid, 0), do: {:ok, cid, store}
689
690 defp wrap_with_empty_layers(store, cid, n) when n > 0 do
691 wrapper = %Node{left: cid, entries: []}
692
693 with {:ok, wrapper_cid, store} <- write_node(store, wrapper) do
694 wrap_with_empty_layers(store, wrapper_cid, n - 1)
695 end
696 end
697
698 # ---------------------------------------------------------------------------
699 # Private — key position helpers
700 # ---------------------------------------------------------------------------
701
702 # Returns the position of `key` in the sorted `full_keys` list:
703 # {:found, idx} — key is at index idx
704 # {:left} — key < all keys (belongs in left subtree)
705 # {:right, idx} — key > keys[idx] (belongs in right subtree of entry idx)
706 @spec locate([binary()], binary()) ::
707 {:found, non_neg_integer()} | {:left} | {:right, non_neg_integer()}
708 defp locate([], _key), do: {:left}
709
710 defp locate(keys, key) do
711 n = Kernel.length(keys)
712 bin_locate(keys, key, 0, n - 1, n)
713 end
714
715 @spec bin_locate([binary()], binary(), integer(), integer(), non_neg_integer()) ::
716 {:found, non_neg_integer()} | {:left} | {:right, non_neg_integer()}
717 defp bin_locate(_keys, _key, lo, hi, _n) when lo > hi do
718 if lo == 0, do: {:left}, else: {:right, lo - 1}
719 end
720
721 defp bin_locate(keys, key, lo, hi, n) do
722 mid = div(lo + hi, 2)
723 mid_key = Enum.at(keys, mid)
724
725 cond do
726 mid_key == key -> {:found, mid}
727 mid_key < key -> bin_locate(keys, key, mid + 1, hi, n)
728 true -> bin_locate(keys, key, lo, mid - 1, n)
729 end
730 end
731
732 # Returns the index of the first key >= `target`, or `length(keys)` if none.
733 @spec lower_bound([binary()], binary()) :: non_neg_integer()
734 defp lower_bound(keys, target) do
735 Enum.find_index(keys, fn k -> k >= target end) || Kernel.length(keys)
736 end
737
738 # ---------------------------------------------------------------------------
739 # Private — layer inference
740 # ---------------------------------------------------------------------------
741
742 # Infer the MST layer of a non-empty node from its first entry's key.
743 @spec node_layer(Node.t()) :: non_neg_integer() | nil
744 defp node_layer(%Node{entries: []}), do: nil
745
746 defp node_layer(%Node{entries: [first | _]}) do
747 Height.for_key(first.key_suffix)
748 end
749
750 # Compute the height of a node, walking into children if the node has no
751 # entries (empty intermediate nodes).
752 @spec require_height(store(), Node.t()) :: non_neg_integer()
753 defp require_height(store, node) do
754 case node_layer(node) do
755 nil ->
756 if node.left do
757 {:ok, child} = fetch_node(store, node.left)
758 require_height(store, child) + 1
759 else
760 0
761 end
762
763 h ->
764 h
765 end
766 end
767
768 # ---------------------------------------------------------------------------
769 # Private — construction helpers
770 # ---------------------------------------------------------------------------
771
772 @spec leaf_node(binary(), CID.t()) :: Node.t()
773 defp leaf_node(key, value) do
774 %Node{
775 left: nil,
776 entries: [%Entry{prefix_len: 0, key_suffix: key, value: value, right: nil}]
777 }
778 end
779
780 # ---------------------------------------------------------------------------
781 # Private — node array conversions
782 # ---------------------------------------------------------------------------
783
784 # Converts a node into a parallel-array representation:
785 # {[{key, value}], [subtree_cid | nil]}
786 # where subtrees has length(kv_pairs) + 1.
787 # subtrees[0] = node.left, subtrees[i+1] = entries[i].right.
788 @spec node_to_arrays(Node.t()) :: {[{binary(), CID.t()}], [CID.t() | nil]}
789 defp node_to_arrays(node) do
790 full_keys = Node.keys(node)
791 kv_pairs = Enum.zip(full_keys, Enum.map(node.entries, & &1.value))
792 subtrees = [node.left | Enum.map(node.entries, & &1.right)]
793 {kv_pairs, subtrees}
794 end
795
796 # Converts the parallel-array representation back to a `Node`.
797 @spec arrays_to_node([{binary(), CID.t()}], [CID.t() | nil]) :: Node.t()
798 defp arrays_to_node(kv_pairs, subtrees) do
799 [left | right_ptrs] = subtrees
800
801 triples =
802 Enum.zip(kv_pairs, right_ptrs)
803 |> Enum.map(fn {{k, v}, r} -> {k, v, r} end)
804
805 entries = Node.compress_entries(triples)
806 %Node{left: left, entries: entries}
807 end
808end