Persistent store with Git semantics: lazy reads, delayed writes, content-addressing
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

irmin: Implement Store.diff for tree comparison

Implements the previously stubbed diff function that compares two trees
and yields a sequence of changes (Add, Remove, Change).

The algorithm recursively traverses both trees and:
- Emits Remove for entries only in old tree
- Emits Add for entries only in new tree
- Emits Change for modified contents
- Handles subtree transitions (contents ↔ node)

+145 -3
+109 -3
lib/store.ml
··· 107 107 | `Remove of Tree.path 108 108 | `Change of Tree.path * hash * hash ] 109 109 110 - let diff _t ~old:_ ~new_:_ = 111 - (* TODO: Implement tree diff *) 112 - Seq.empty 110 + let diff t ~old ~new_ = 111 + let old_tree = read_tree t old in 112 + let new_tree = read_tree t new_ in 113 + 114 + let rec diff_trees prefix old_tree new_tree = 115 + let old_entries = Tree.list old_tree [] in 116 + let new_entries = Tree.list new_tree [] in 117 + 118 + let old_names = List.map fst old_entries in 119 + let new_names = List.map fst new_entries in 120 + 121 + (* Entries only in old -> Remove *) 122 + let removed = 123 + old_names 124 + |> List.filter (fun name -> not (List.mem name new_names)) 125 + |> List.to_seq 126 + |> Seq.map (fun name -> `Remove (prefix @ [ name ])) 127 + in 128 + 129 + (* Entries only in new -> Add *) 130 + let added = 131 + new_names 132 + |> List.filter (fun name -> not (List.mem name old_names)) 133 + |> List.to_seq 134 + |> Seq.filter_map (fun name -> 135 + match Tree.find new_tree [ name ] with 136 + | Some content -> 137 + let hash = F.hash_contents content in 138 + Some (`Add (prefix @ [ name ], hash)) 139 + | None -> 140 + (* It's a subtree - handled by added_subtrees recursion below *) 141 + None) 142 + in 143 + 144 + (* Entries in both -> check for changes *) 145 + let common = 146 + List.filter (fun name -> List.mem name new_names) old_names 147 + in 148 + let changes = 149 + common |> List.to_seq 150 + |> Seq.flat_map (fun name -> 151 + let path = prefix @ [ name ] in 152 + let old_kind = List.assoc name old_entries in 153 + let new_kind = List.assoc name new_entries in 154 + match (old_kind, new_kind) with 155 + | `Contents, `Contents -> ( 156 + match 157 + (Tree.find old_tree [ name ], Tree.find new_tree [ name ]) 158 + with 159 + | Some old_c, Some new_c -> 160 + let old_h = F.hash_contents old_c in 161 + let new_h = F.hash_contents new_c in 162 + if F.hash_equal old_h new_h then Seq.empty 163 + else Seq.return (`Change (path, old_h, new_h)) 164 + | _ -> Seq.empty) 165 + | `Node, `Node -> ( 166 + match 167 + ( Tree.find_tree old_tree [ name ], 168 + Tree.find_tree new_tree [ name ] ) 169 + with 170 + | Some old_sub, Some new_sub -> diff_trees path old_sub new_sub 171 + | _ -> Seq.empty) 172 + | `Contents, `Node -> 173 + (* Changed from contents to tree - remove old contents *) 174 + Seq.return (`Remove path) 175 + |> Seq.append 176 + (match Tree.find_tree new_tree [ name ] with 177 + | Some sub -> diff_trees path (Tree.empty ()) sub 178 + | None -> Seq.empty) 179 + | `Node, `Contents -> 180 + (* Changed from tree to contents - add new contents *) 181 + (match Tree.find new_tree [ name ] with 182 + | Some c -> 183 + let new_h = F.hash_contents c in 184 + Seq.return (`Add (path, new_h)) 185 + | None -> Seq.empty) 186 + |> Seq.append 187 + (match Tree.find_tree old_tree [ name ] with 188 + | Some sub -> diff_trees path sub (Tree.empty ()) 189 + | None -> Seq.empty)) 190 + in 191 + 192 + (* Also recurse into added subtrees *) 193 + let added_subtrees = 194 + new_names 195 + |> List.filter (fun name -> not (List.mem name old_names)) 196 + |> List.to_seq 197 + |> Seq.flat_map (fun name -> 198 + match Tree.find_tree new_tree [ name ] with 199 + | Some sub -> diff_trees (prefix @ [ name ]) (Tree.empty ()) sub 200 + | None -> Seq.empty) 201 + in 202 + 203 + (* Also recurse into removed subtrees *) 204 + let removed_subtrees = 205 + old_names 206 + |> List.filter (fun name -> not (List.mem name new_names)) 207 + |> List.to_seq 208 + |> Seq.flat_map (fun name -> 209 + match Tree.find_tree old_tree [ name ] with 210 + | Some sub -> diff_trees (prefix @ [ name ]) sub (Tree.empty ()) 211 + | None -> Seq.empty) 212 + in 213 + 214 + Seq.append removed 215 + (Seq.append added 216 + (Seq.append changes (Seq.append added_subtrees removed_subtrees))) 217 + in 218 + diff_trees [] old_tree new_tree 113 219 end 114 220 115 221 module Git = Make (Codec.Git)
+36
test/test_irmin.ml
··· 181 181 Alcotest.test_case "backend test_and_set" `Quick test_backend_test_and_set; 182 182 ] 183 183 184 + let test_store_diff () = 185 + let backend = Backend.Memory.create_sha1 () in 186 + let store = Store.Git.create ~backend in 187 + (* Create first commit with two files *) 188 + let tree1 = Tree.Git.empty () in 189 + let tree1 = Tree.Git.add tree1 [ "file1.txt" ] "content1" in 190 + let tree1 = Tree.Git.add tree1 [ "file2.txt" ] "content2" in 191 + let hash1 = Tree.Git.hash tree1 ~backend in 192 + (* Create second tree: modify file1, remove file2, add file3 *) 193 + let tree2 = Tree.Git.empty () in 194 + let tree2 = Tree.Git.add tree2 [ "file1.txt" ] "modified1" in 195 + let tree2 = Tree.Git.add tree2 [ "file3.txt" ] "content3" in 196 + let hash2 = Tree.Git.hash tree2 ~backend in 197 + (* Compute diff *) 198 + let changes = Store.Git.diff store ~old:hash1 ~new_:hash2 |> List.of_seq in 199 + (* Check we have the expected changes *) 200 + let has_remove_file2 = 201 + List.exists 202 + (function `Remove [ "file2.txt" ] -> true | _ -> false) 203 + changes 204 + in 205 + let has_add_file3 = 206 + List.exists 207 + (function `Add ([ "file3.txt" ], _) -> true | _ -> false) 208 + changes 209 + in 210 + let has_change_file1 = 211 + List.exists 212 + (function `Change ([ "file1.txt" ], _, _) -> true | _ -> false) 213 + changes 214 + in 215 + Alcotest.(check bool) "file2 removed" true has_remove_file2; 216 + Alcotest.(check bool) "file3 added" true has_add_file3; 217 + Alcotest.(check bool) "file1 changed" true has_change_file1 218 + 184 219 let store_tests = 185 220 [ 186 221 Alcotest.test_case "store commit" `Quick test_store_commit; 187 222 Alcotest.test_case "store branches" `Quick test_store_branches; 223 + Alcotest.test_case "store diff" `Quick test_store_diff; 188 224 ] 189 225 190 226 let tree_format_tests =