An Elixir implementation of AT Protocol-flavoured Merkle Search Trees (MST)
at main 214 lines 7.3 kB view raw
1defmodule MST.Diff do 2 @moduledoc """ 3 Computes the diff between two `MST.Tree` instances. 4 5 A diff captures: 6 7 - Which MST nodes were **created** (present in `b` but not `a`) 8 - Which MST nodes were **deleted** (present in `a` but not `b`) 9 - The per-key **record operations** (creates, updates, and deletes) 10 11 ## Algorithm 12 13 Node sets (`created_nodes` / `deleted_nodes`) are computed by collecting all 14 reachable node CIDs from each tree root with a DFS, then taking set 15 differences. Equal CIDs short-circuit entire subtrees (no need to recurse 16 into subtrees both trees share). 17 18 Record ops are computed by fully materialising both trees as sorted key/value 19 lists and performing a linear sorted-merge comparison. This is straightforward 20 and correct at the cost of O(n) memory; it is the right tradeoff given that 21 the diff is typically used to inspect the full changeset anyway. 22 23 ## Example 24 25 {:ok, diff} = MST.Diff.compute(tree_a, tree_b) 26 # diff.record_ops is a sorted list of MST.Diff.Op structs 27 28 """ 29 30 use TypedStruct 31 32 alias DASL.CID 33 alias MST.{Node, Store, Tree} 34 35 @type diff_error() :: {:error, atom()} 36 37 typedstruct enforce: true do 38 field :created_nodes, MapSet.t(CID.t()), default: MapSet.new() 39 field :deleted_nodes, MapSet.t(CID.t()), default: MapSet.new() 40 field :record_ops, [MST.Diff.Op.t()], default: [] 41 end 42 43 # --------------------------------------------------------------------------- 44 # Public API 45 # --------------------------------------------------------------------------- 46 47 @doc """ 48 Computes the diff from `tree_a` to `tree_b`. 49 50 Both trees must use stores that have their nodes populated (e.g. loaded from 51 CAR files). Returns `{:ok, diff}` or an error if a node is missing. 52 53 ## Examples 54 55 iex> store = MST.Store.Memory.new() 56 iex> ta = MST.Tree.new(store) 57 iex> val = DASL.CID.compute("v") 58 iex> {:ok, tb} = MST.Tree.put(ta, "col/a", val) 59 iex> {:ok, diff} = MST.Diff.compute(ta, tb) 60 iex> length(diff.record_ops) 61 1 62 iex> hd(diff.record_ops).key 63 "col/a" 64 65 """ 66 @spec compute(Tree.t(), Tree.t()) :: {:ok, t()} | diff_error() 67 def compute(%Tree{root: root_a, store: store_a}, %Tree{root: root_b, store: store_b}) do 68 with {:ok, nodes_a} <- reachable_nodes(store_a, root_a), 69 {:ok, nodes_b} <- reachable_nodes(store_b, root_b), 70 {:ok, leaves_a} <- collect_leaves(store_a, root_a), 71 {:ok, leaves_b} <- collect_leaves(store_b, root_b) do 72 ops = merge_ops(leaves_a, leaves_b, []) 73 74 {:ok, 75 %__MODULE__{ 76 created_nodes: MapSet.difference(nodes_b, nodes_a), 77 deleted_nodes: MapSet.difference(nodes_a, nodes_b), 78 record_ops: ops 79 }} 80 end 81 end 82 83 # --------------------------------------------------------------------------- 84 # Private — reachable node collection 85 # --------------------------------------------------------------------------- 86 87 @spec reachable_nodes(Store.t(), CID.t() | nil) :: {:ok, MapSet.t(CID.t())} | diff_error() 88 defp reachable_nodes(_store, nil), do: {:ok, MapSet.new()} 89 defp reachable_nodes(store, root), do: collect_nodes(store, root, MapSet.new()) 90 91 @spec collect_nodes(Store.t(), CID.t(), MapSet.t(CID.t())) :: 92 {:ok, MapSet.t(CID.t())} | diff_error() 93 defp collect_nodes(store, cid, visited) do 94 if MapSet.member?(visited, cid) do 95 {:ok, visited} 96 else 97 with {:ok, node} <- fetch(store, cid) do 98 visited = MapSet.put(visited, cid) 99 100 Enum.reduce_while(subtree_cids(node), {:ok, visited}, fn sub, {:ok, v} -> 101 case collect_nodes(store, sub, v) do 102 {:ok, v} -> {:cont, {:ok, v}} 103 err -> {:halt, err} 104 end 105 end) 106 end 107 end 108 end 109 110 @spec subtree_cids(Node.t()) :: [CID.t()] 111 defp subtree_cids(node) do 112 left = if node.left, do: [node.left], else: [] 113 rights = Enum.flat_map(node.entries, fn e -> if e.right, do: [e.right], else: [] end) 114 left ++ rights 115 end 116 117 # --------------------------------------------------------------------------- 118 # Private — leaf collection (in sorted order) 119 # --------------------------------------------------------------------------- 120 121 @spec collect_leaves(Store.t(), CID.t() | nil) :: 122 {:ok, [{binary(), CID.t()}]} | diff_error() 123 defp collect_leaves(_store, nil), do: {:ok, []} 124 125 defp collect_leaves(store, root) do 126 with {:ok, pairs} <- do_walk(store, root, []) do 127 {:ok, Enum.reverse(pairs)} 128 end 129 end 130 131 # Accumulates pairs in reverse order (prepend for efficiency, reverse at end). 132 @spec do_walk(Store.t(), CID.t(), [{binary(), CID.t()}]) :: 133 {:ok, [{binary(), CID.t()}]} | diff_error() 134 defp do_walk(store, cid, acc) do 135 with {:ok, node} <- fetch(store, cid) do 136 full_keys = Node.keys(node) 137 do_walk_left(store, node, full_keys, acc) 138 end 139 end 140 141 @spec do_walk_left(Store.t(), Node.t(), [binary()], [{binary(), CID.t()}]) :: 142 {:ok, [{binary(), CID.t()}]} | diff_error() 143 defp do_walk_left(store, node, full_keys, acc) do 144 with {:ok, acc} <- maybe_do_walk(store, node.left, acc) do 145 do_walk_entries(store, node.entries, full_keys, acc) 146 end 147 end 148 149 defp maybe_do_walk(_store, nil, acc), do: {:ok, acc} 150 defp maybe_do_walk(store, cid, acc), do: do_walk(store, cid, acc) 151 152 defp do_walk_entries(_store, [], [], acc), do: {:ok, acc} 153 154 defp do_walk_entries(store, [entry | rest_e], [key | rest_k], acc) do 155 acc = [{key, entry.value} | acc] 156 157 with {:ok, acc} <- maybe_do_walk(store, entry.right, acc) do 158 do_walk_entries(store, rest_e, rest_k, acc) 159 end 160 end 161 162 # --------------------------------------------------------------------------- 163 # Private — sorted-merge diff 164 # --------------------------------------------------------------------------- 165 166 @spec merge_ops( 167 [{binary(), CID.t()}], 168 [{binary(), CID.t()}], 169 [MST.Diff.Op.t()] 170 ) :: [MST.Diff.Op.t()] 171 defp merge_ops([], [], ops), do: Enum.reverse(ops) 172 173 defp merge_ops([], [{kb, vb} | rest_b], ops) do 174 op = %MST.Diff.Op{key: kb, old_value: nil, new_value: vb} 175 merge_ops([], rest_b, [op | ops]) 176 end 177 178 defp merge_ops([{ka, va} | rest_a], [], ops) do 179 op = %MST.Diff.Op{key: ka, old_value: va, new_value: nil} 180 merge_ops(rest_a, [], [op | ops]) 181 end 182 183 defp merge_ops([{ka, va} | rest_a], [{kb, vb} | rest_b], ops) do 184 cond do 185 ka == kb -> 186 new_ops = 187 if va == vb, 188 do: ops, 189 else: [%MST.Diff.Op{key: ka, old_value: va, new_value: vb} | ops] 190 191 merge_ops(rest_a, rest_b, new_ops) 192 193 ka < kb -> 194 op = %MST.Diff.Op{key: ka, old_value: va, new_value: nil} 195 merge_ops(rest_a, [{kb, vb} | rest_b], [op | ops]) 196 197 true -> 198 op = %MST.Diff.Op{key: kb, old_value: nil, new_value: vb} 199 merge_ops([{ka, va} | rest_a], rest_b, [op | ops]) 200 end 201 end 202 203 # --------------------------------------------------------------------------- 204 # Private — store access 205 # --------------------------------------------------------------------------- 206 207 @spec fetch(Store.t(), CID.t()) :: {:ok, Node.t()} | diff_error() 208 defp fetch(store, cid) do 209 case Store.get(store, cid) do 210 {:ok, node} -> {:ok, node} 211 {:error, :not_found} -> {:error, :missing_node} 212 end 213 end 214end