objective categorical abstract machine language personal data server

mist: Don't collect all leaves to add above root when not needed

futur.blue c7a2bdb3 b0fa75dc

verified
Changed files
+110 -36
mist
lib
test
+70 -35
mist/lib/mst.ml
··· 235 235 236 236 val leaves_of_root : t -> (string * Cid.t) list Lwt.t 237 237 238 + val get_min_key : t -> Cid.t -> string option Lwt.t 239 + 240 + val get_max_key : t -> Cid.t -> string option Lwt.t 241 + 238 242 val equal : t -> t -> bool Lwt.t 239 243 end 240 244 ··· 424 428 (r, full_key) :: acc 425 429 | None -> 426 430 acc ) 427 - ( match raw.l with 428 - | Some l -> 429 - [(l, prefix)] 430 - | None -> 431 - [] ) 431 + (match raw.l with Some l -> [(l, prefix)] | None -> []) 432 432 raw.e 433 433 in 434 434 (Cid.Set.add cid visited, List.rev_append next_pairs queue) ) ··· 496 496 (List.rev acc, seen) 497 497 | `Node cid :: rest -> 498 498 if 499 - Cid.Set.mem cid missing 500 - || Block_map.has cid cache 499 + Cid.Set.mem cid missing || Block_map.has cid cache 501 500 || Cid.Set.mem cid seen 502 501 then collect acc seen remaining rest 503 502 else ··· 512 511 let cache' = 513 512 List.fold_left 514 513 (fun acc (cid, bytes) -> Block_map.set cid bytes acc) 515 - cache (Block_map.entries bm.blocks) 514 + cache 515 + (Block_map.entries bm.blocks) 516 516 in 517 517 let missing' = 518 - List.fold_left 519 - (fun acc cid -> Cid.Set.add cid acc) 520 - missing bm.missing 518 + List.fold_left (fun acc cid -> Cid.Set.add cid acc) missing bm.missing 521 519 in 522 520 Lwt.return (cache', missing') 523 521 in ··· 527 525 Lwt.return_none 528 526 | `Leaf cid :: rest -> 529 527 Lwt.return_some ((Leaf cid : ordered_item), (rest, cache, missing)) 530 - | `Node cid :: rest -> 528 + | `Node cid :: rest -> ( 531 529 if Cid.Set.mem cid missing then step (rest, cache, missing) 532 530 else 533 - ( match Block_map.get cid cache with 531 + match Block_map.get cid cache with 534 532 | None -> 535 533 let%lwt cache', missing' = prefetch queue cache missing in 536 534 if cache' == cache && Cid.Set.mem cid missing' then ··· 554 552 let new_queue = left_queue @ entries_queue @ rest in 555 553 let cache' = Block_map.remove cid cache in 556 554 Lwt.return_some 557 - ((Node (cid, bytes) : ordered_item), (new_queue, cache', missing)) 558 - ) 555 + ( (Node (cid, bytes) : ordered_item) 556 + , (new_queue, cache', missing) ) ) 559 557 in 560 558 Lwt_seq.unfold_lwt step ([`Node t.root], Block_map.empty, Cid.Set.empty) 561 559 ··· 565 563 let entries = 566 564 if entries_are_sorted node.entries then node.entries 567 565 else 568 - List.sort (fun (a : entry) b -> String.compare a.key b.key) 566 + List.sort 567 + (fun (a : entry) b -> String.compare a.key b.key) 569 568 node.entries 570 569 in 571 570 let%lwt left = ··· 612 611 | Error e -> 613 612 raise e 614 613 in 615 - try%lwt Lwt.map Result.ok (aux node) 616 - with e -> Lwt.return_error e 614 + try%lwt Lwt.map Result.ok (aux node) with e -> Lwt.return_error e 617 615 618 616 (* raw-node helpers for covering proofs: operate on stored bytes, not re-serialization *) 619 617 type interleaved_entry = ··· 769 767 let missing = ref Cid.Set.empty in 770 768 let acc = ref Block_map.empty in 771 769 let add_block cid bytes = 772 - if not (Block_map.has cid !acc) then 773 - acc := Block_map.set cid bytes !acc 770 + if not (Block_map.has cid !acc) then acc := Block_map.set cid bytes !acc 774 771 in 775 772 let get_bytes_cached cid = 776 773 match Block_map.get cid !cache with ··· 792 789 | None -> 793 790 Lwt.return_unit 794 791 | Some leaf_cid -> ( 795 - match%lwt get_bytes_cached leaf_cid with 796 - | Some bytes -> 797 - add_block leaf_cid bytes ; 798 - Lwt.return_unit 799 - | None -> 800 - Lwt.return_unit ) 792 + match%lwt get_bytes_cached leaf_cid with 793 + | Some bytes -> 794 + add_block leaf_cid bytes ; Lwt.return_unit 795 + | None -> 796 + Lwt.return_unit ) 801 797 in 802 798 let rec proof_for_key_cached cid key = 803 799 match%lwt get_bytes_cached cid with 804 800 | None -> 805 801 Lwt.return_unit 806 - | Some bytes -> 802 + | Some bytes -> ( 807 803 add_block cid bytes ; 808 804 let raw = decode_block_raw bytes in 809 805 let keys = node_entry_keys raw in 810 806 let seq = interleave_raw raw keys in 811 807 let index = find_gte_leaf_index key seq in 812 - ( match List.nth_opt seq index with 808 + match List.nth_opt seq index with 813 809 | Some (Leaf (k, _, _)) when k = key -> 814 810 Lwt.return_unit 815 811 | Some (Leaf (_k, v_right, _)) -> ( ··· 896 892 , (cid, bytes) :: nodes 897 893 , leaves' 898 894 , List.rev_append next_cids queue ) ) 899 - (visited, nodes, leaves, rest) batch 895 + (visited, nodes, leaves, rest) 896 + batch 900 897 in 901 898 loop next_queue visited' nodes' leaves' 902 899 in ··· 1212 1209 | None -> 1213 1210 Lwt.return [] 1214 1211 1212 + (* returns the minimum key in a subtree by following the leftmost path *) 1213 + let rec get_min_key (t : t) (cid : Cid.t) : string option Lwt.t = 1214 + match%lwt retrieve_node_raw t cid with 1215 + | None -> 1216 + Lwt.return_none 1217 + | Some raw -> ( 1218 + match raw.l with 1219 + | Some left_cid -> 1220 + get_min_key t left_cid 1221 + | None -> ( 1222 + match raw.e with 1223 + | [] -> 1224 + Lwt.return_none 1225 + | first :: _ -> 1226 + Lwt.return_some (Bytes.to_string first.k) ) ) 1227 + 1228 + (* returns the maximum key in a subtree by following the rightmost path *) 1229 + let rec get_max_key (t : t) (cid : Cid.t) : string option Lwt.t = 1230 + match%lwt retrieve_node_raw t cid with 1231 + | None -> 1232 + Lwt.return_none 1233 + | Some raw -> ( 1234 + let keys = decompress_keys raw in 1235 + match List.rev (List.combine keys raw.e) with 1236 + | [] -> ( 1237 + match raw.l with 1238 + | Some left_cid -> 1239 + get_max_key t left_cid 1240 + | None -> 1241 + Lwt.return_none ) 1242 + | (last_key, last_entry) :: _ -> ( 1243 + match last_entry.t with 1244 + | Some right_cid -> 1245 + get_max_key t right_cid 1246 + | None -> 1247 + Lwt.return_some last_key ) ) 1248 + 1215 1249 (* rebuild a subtree from leaves 1216 1250 returns (root_cid option, actual_layer) *) 1217 1251 let rebuild_subtree (blockstore : bs) (leaves : (string * Cid.t) list) : ··· 1265 1299 let%lwt wrapped_old = 1266 1300 wrap_to_layer t.blockstore old_root_cid old_root_layer (key_layer - 1) 1267 1301 in 1268 - (* get all keys from old tree to determine position *) 1269 - let%lwt old_leaves = collect_subtree_leaves t old_root_cid in 1270 - let old_keys = List.map fst old_leaves in 1271 - let all_less = List.for_all (fun k -> k < key) old_keys in 1272 - let all_greater = List.for_all (fun k -> k > key) old_keys in 1302 + (* check boundary keys to determine position *) 1303 + let%lwt min_key = get_min_key t old_root_cid in 1304 + let%lwt max_key = get_max_key t old_root_cid in 1305 + let all_less = match max_key with Some mx -> mx < key | None -> true in 1306 + let all_greater = match min_key with Some mn -> mn > key | None -> true in 1273 1307 if all_less then 1274 1308 (* all old keys < new key: old tree is left, new entry has no right *) 1275 1309 let entries = compress_entries [(key, value, None)] in ··· 1279 1313 let entries = compress_entries [(key, value, Some wrapped_old)] in 1280 1314 persist_node_raw t.blockstore {l= None; e= entries} 1281 1315 else 1282 - (* key is in the middle: need to split *) 1316 + (* key is in the middle: need to split; collect all leaves *) 1317 + let%lwt old_leaves = collect_subtree_leaves t old_root_cid in 1283 1318 let left_leaves = List.filter (fun (k, _) -> k < key) old_leaves in 1284 1319 let right_leaves = List.filter (fun (k, _) -> k > key) old_leaves in 1285 1320 let%lwt left_cid, left_layer = rebuild_subtree t.blockstore left_leaves in
+40 -1
mist/test/test_mst.ml
··· 860 860 Alcotest.fail "key should exist after update" ) ; 861 861 Lwt.return_ok () 862 862 863 + let test_get_min_max_keys () = 864 + let store = Storage.Memory_blockstore.create () in 865 + let cid1 = 866 + cid_of_string_exn 867 + "bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454" 868 + in 869 + let* mst = Mem_mst.create_empty store in 870 + (* empty tree *) 871 + let%lwt min_empty = Mem_mst.get_min_key mst mst.root in 872 + let%lwt max_empty = Mem_mst.get_max_key mst mst.root in 873 + Alcotest.(check (option string)) "empty min" None min_empty ; 874 + Alcotest.(check (option string)) "empty max" None max_empty ; 875 + (* single entry *) 876 + let%lwt mst = Mem_mst.add mst "com.example/mmm" cid1 in 877 + let%lwt min_single = Mem_mst.get_min_key mst mst.root in 878 + let%lwt max_single = Mem_mst.get_max_key mst mst.root in 879 + Alcotest.(check (option string)) "single min" (Some "com.example/mmm") min_single ; 880 + Alcotest.(check (option string)) "single max" (Some "com.example/mmm") max_single ; 881 + (* multiple entries at different layers *) 882 + let%lwt mst = Mem_mst.add mst "com.example/aaa" cid1 in 883 + let%lwt mst = Mem_mst.add mst "com.example/zzz" cid1 in 884 + let%lwt mst = Mem_mst.add mst "com.example/bbb" cid1 in 885 + let%lwt mst = Mem_mst.add mst "com.example/yyy" cid1 in 886 + let%lwt min_key = Mem_mst.get_min_key mst mst.root in 887 + let%lwt max_key = Mem_mst.get_max_key mst mst.root in 888 + Alcotest.(check (option string)) "multi min" (Some "com.example/aaa") min_key ; 889 + Alcotest.(check (option string)) "multi max" (Some "com.example/zzz") max_key ; 890 + (* add keys with high layer values to exercise deeper tree structure *) 891 + let%lwt mst = Mem_mst.add mst "com.example.record/3jqfcqzm3fs2j" cid1 in 892 + let%lwt mst = Mem_mst.add mst "com.example.record/3jqfcqzm3fn2j" cid1 in 893 + let%lwt min_deep = Mem_mst.get_min_key mst mst.root in 894 + let%lwt max_deep = Mem_mst.get_max_key mst mst.root in 895 + Alcotest.(check (option string)) "deep min" (Some "com.example.record/3jqfcqzm3fn2j") min_deep ; 896 + Alcotest.(check (option string)) "deep max" (Some "com.example/zzz") max_deep ; 897 + Lwt.return_ok () 898 + 863 899 let () = 864 900 let open Alcotest in 865 901 let run_test test = ··· 909 945 ; test_case "mixed incremental ops" `Quick (fun () -> 910 946 run_test test_incremental_mixed_ops_canonicity ) 911 947 ; test_case "incremental edge cases" `Quick (fun () -> 912 - run_test test_incremental_edge_cases ) ] ) ] 948 + run_test test_incremental_edge_cases ) ] ) 949 + ; ( "boundary functions" 950 + , [ test_case "get_min_key and get_max_key" `Quick (fun () -> 951 + run_test test_get_min_max_keys ) ] ) ]