An Elixir implementation of AT Protocol-flavoured Merkle Search Trees (MST)
1defmodule MST.Diff do
2 @moduledoc """
3 Computes the diff between two `MST.Tree` instances.
4
5 A diff captures:
6
7 - Which MST nodes were **created** (present in `b` but not `a`)
8 - Which MST nodes were **deleted** (present in `a` but not `b`)
9 - The per-key **record operations** (creates, updates, and deletes)
10
11 ## Algorithm
12
13 Node sets (`created_nodes` / `deleted_nodes`) are computed by collecting all
14 reachable node CIDs from each tree root with a DFS, then taking set
15 differences. Equal CIDs short-circuit entire subtrees (no need to recurse
16 into subtrees both trees share).
17
18 Record ops are computed by fully materialising both trees as sorted key/value
19 lists and performing a linear sorted-merge comparison. This is straightforward
20 and correct at the cost of O(n) memory; it is the right tradeoff given that
21 the diff is typically used to inspect the full changeset anyway.
22
23 ## Example
24
25 {:ok, diff} = MST.Diff.compute(tree_a, tree_b)
26 # diff.record_ops is a sorted list of MST.Diff.Op structs
27
28 """
29
30 use TypedStruct
31
32 alias DASL.CID
33 alias MST.{Node, Store, Tree}
34
35 @type diff_error() :: {:error, atom()}
36
37 typedstruct enforce: true do
38 field :created_nodes, MapSet.t(CID.t()), default: MapSet.new()
39 field :deleted_nodes, MapSet.t(CID.t()), default: MapSet.new()
40 field :record_ops, [MST.Diff.Op.t()], default: []
41 end
42
43 # ---------------------------------------------------------------------------
44 # Public API
45 # ---------------------------------------------------------------------------
46
47 @doc """
48 Computes the diff from `tree_a` to `tree_b`.
49
50 Both trees must use stores that have their nodes populated (e.g. loaded from
51 CAR files). Returns `{:ok, diff}` or an error if a node is missing.
52
53 ## Examples
54
55 iex> store = MST.Store.Memory.new()
56 iex> ta = MST.Tree.new(store)
57 iex> val = DASL.CID.compute("v")
58 iex> {:ok, tb} = MST.Tree.put(ta, "col/a", val)
59 iex> {:ok, diff} = MST.Diff.compute(ta, tb)
60 iex> length(diff.record_ops)
61 1
62 iex> hd(diff.record_ops).key
63 "col/a"
64
65 """
66 @spec compute(Tree.t(), Tree.t()) :: {:ok, t()} | diff_error()
67 def compute(%Tree{root: root_a, store: store_a}, %Tree{root: root_b, store: store_b}) do
68 with {:ok, nodes_a} <- reachable_nodes(store_a, root_a),
69 {:ok, nodes_b} <- reachable_nodes(store_b, root_b),
70 {:ok, leaves_a} <- collect_leaves(store_a, root_a),
71 {:ok, leaves_b} <- collect_leaves(store_b, root_b) do
72 ops = merge_ops(leaves_a, leaves_b, [])
73
74 {:ok,
75 %__MODULE__{
76 created_nodes: MapSet.difference(nodes_b, nodes_a),
77 deleted_nodes: MapSet.difference(nodes_a, nodes_b),
78 record_ops: ops
79 }}
80 end
81 end
82
83 # ---------------------------------------------------------------------------
84 # Private — reachable node collection
85 # ---------------------------------------------------------------------------
86
87 @spec reachable_nodes(Store.t(), CID.t() | nil) :: {:ok, MapSet.t(CID.t())} | diff_error()
88 defp reachable_nodes(_store, nil), do: {:ok, MapSet.new()}
89 defp reachable_nodes(store, root), do: collect_nodes(store, root, MapSet.new())
90
91 @spec collect_nodes(Store.t(), CID.t(), MapSet.t(CID.t())) ::
92 {:ok, MapSet.t(CID.t())} | diff_error()
93 defp collect_nodes(store, cid, visited) do
94 if MapSet.member?(visited, cid) do
95 {:ok, visited}
96 else
97 with {:ok, node} <- fetch(store, cid) do
98 visited = MapSet.put(visited, cid)
99
100 Enum.reduce_while(subtree_cids(node), {:ok, visited}, fn sub, {:ok, v} ->
101 case collect_nodes(store, sub, v) do
102 {:ok, v} -> {:cont, {:ok, v}}
103 err -> {:halt, err}
104 end
105 end)
106 end
107 end
108 end
109
110 @spec subtree_cids(Node.t()) :: [CID.t()]
111 defp subtree_cids(node) do
112 left = if node.left, do: [node.left], else: []
113 rights = Enum.flat_map(node.entries, fn e -> if e.right, do: [e.right], else: [] end)
114 left ++ rights
115 end
116
117 # ---------------------------------------------------------------------------
118 # Private — leaf collection (in sorted order)
119 # ---------------------------------------------------------------------------
120
121 @spec collect_leaves(Store.t(), CID.t() | nil) ::
122 {:ok, [{binary(), CID.t()}]} | diff_error()
123 defp collect_leaves(_store, nil), do: {:ok, []}
124
125 defp collect_leaves(store, root) do
126 with {:ok, pairs} <- do_walk(store, root, []) do
127 {:ok, Enum.reverse(pairs)}
128 end
129 end
130
131 # Accumulates pairs in reverse order (prepend for efficiency, reverse at end).
132 @spec do_walk(Store.t(), CID.t(), [{binary(), CID.t()}]) ::
133 {:ok, [{binary(), CID.t()}]} | diff_error()
134 defp do_walk(store, cid, acc) do
135 with {:ok, node} <- fetch(store, cid) do
136 full_keys = Node.keys(node)
137 do_walk_left(store, node, full_keys, acc)
138 end
139 end
140
141 @spec do_walk_left(Store.t(), Node.t(), [binary()], [{binary(), CID.t()}]) ::
142 {:ok, [{binary(), CID.t()}]} | diff_error()
143 defp do_walk_left(store, node, full_keys, acc) do
144 with {:ok, acc} <- maybe_do_walk(store, node.left, acc) do
145 do_walk_entries(store, node.entries, full_keys, acc)
146 end
147 end
148
149 defp maybe_do_walk(_store, nil, acc), do: {:ok, acc}
150 defp maybe_do_walk(store, cid, acc), do: do_walk(store, cid, acc)
151
152 defp do_walk_entries(_store, [], [], acc), do: {:ok, acc}
153
154 defp do_walk_entries(store, [entry | rest_e], [key | rest_k], acc) do
155 acc = [{key, entry.value} | acc]
156
157 with {:ok, acc} <- maybe_do_walk(store, entry.right, acc) do
158 do_walk_entries(store, rest_e, rest_k, acc)
159 end
160 end
161
162 # ---------------------------------------------------------------------------
163 # Private — sorted-merge diff
164 # ---------------------------------------------------------------------------
165
166 @spec merge_ops(
167 [{binary(), CID.t()}],
168 [{binary(), CID.t()}],
169 [MST.Diff.Op.t()]
170 ) :: [MST.Diff.Op.t()]
171 defp merge_ops([], [], ops), do: Enum.reverse(ops)
172
173 defp merge_ops([], [{kb, vb} | rest_b], ops) do
174 op = %MST.Diff.Op{key: kb, old_value: nil, new_value: vb}
175 merge_ops([], rest_b, [op | ops])
176 end
177
178 defp merge_ops([{ka, va} | rest_a], [], ops) do
179 op = %MST.Diff.Op{key: ka, old_value: va, new_value: nil}
180 merge_ops(rest_a, [], [op | ops])
181 end
182
183 defp merge_ops([{ka, va} | rest_a], [{kb, vb} | rest_b], ops) do
184 cond do
185 ka == kb ->
186 new_ops =
187 if va == vb,
188 do: ops,
189 else: [%MST.Diff.Op{key: ka, old_value: va, new_value: vb} | ops]
190
191 merge_ops(rest_a, rest_b, new_ops)
192
193 ka < kb ->
194 op = %MST.Diff.Op{key: ka, old_value: va, new_value: nil}
195 merge_ops(rest_a, [{kb, vb} | rest_b], [op | ops])
196
197 true ->
198 op = %MST.Diff.Op{key: kb, old_value: nil, new_value: vb}
199 merge_ops([{ka, va} | rest_a], rest_b, [op | ops])
200 end
201 end
202
203 # ---------------------------------------------------------------------------
204 # Private — store access
205 # ---------------------------------------------------------------------------
206
207 @spec fetch(Store.t(), CID.t()) :: {:ok, Node.t()} | diff_error()
208 defp fetch(store, cid) do
209 case Store.get(store, cid) do
210 {:ok, node} -> {:ok, node}
211 {:error, :not_found} -> {:error, :missing_node}
212 end
213 end
214end