+70
-35
mist/lib/mst.ml
+70
-35
mist/lib/mst.ml
···
235
235
236
236
val leaves_of_root : t -> (string * Cid.t) list Lwt.t
237
237
238
+
val get_min_key : t -> Cid.t -> string option Lwt.t
239
+
240
+
val get_max_key : t -> Cid.t -> string option Lwt.t
241
+
238
242
val equal : t -> t -> bool Lwt.t
239
243
end
240
244
···
424
428
(r, full_key) :: acc
425
429
| None ->
426
430
acc )
427
-
( match raw.l with
428
-
| Some l ->
429
-
[(l, prefix)]
430
-
| None ->
431
-
[] )
431
+
(match raw.l with Some l -> [(l, prefix)] | None -> [])
432
432
raw.e
433
433
in
434
434
(Cid.Set.add cid visited, List.rev_append next_pairs queue) )
···
496
496
(List.rev acc, seen)
497
497
| `Node cid :: rest ->
498
498
if
499
-
Cid.Set.mem cid missing
500
-
|| Block_map.has cid cache
499
+
Cid.Set.mem cid missing || Block_map.has cid cache
501
500
|| Cid.Set.mem cid seen
502
501
then collect acc seen remaining rest
503
502
else
···
512
511
let cache' =
513
512
List.fold_left
514
513
(fun acc (cid, bytes) -> Block_map.set cid bytes acc)
515
-
cache (Block_map.entries bm.blocks)
514
+
cache
515
+
(Block_map.entries bm.blocks)
516
516
in
517
517
let missing' =
518
-
List.fold_left
519
-
(fun acc cid -> Cid.Set.add cid acc)
520
-
missing bm.missing
518
+
List.fold_left (fun acc cid -> Cid.Set.add cid acc) missing bm.missing
521
519
in
522
520
Lwt.return (cache', missing')
523
521
in
···
527
525
Lwt.return_none
528
526
| `Leaf cid :: rest ->
529
527
Lwt.return_some ((Leaf cid : ordered_item), (rest, cache, missing))
530
-
| `Node cid :: rest ->
528
+
| `Node cid :: rest -> (
531
529
if Cid.Set.mem cid missing then step (rest, cache, missing)
532
530
else
533
-
( match Block_map.get cid cache with
531
+
match Block_map.get cid cache with
534
532
| None ->
535
533
let%lwt cache', missing' = prefetch queue cache missing in
536
534
if cache' == cache && Cid.Set.mem cid missing' then
···
554
552
let new_queue = left_queue @ entries_queue @ rest in
555
553
let cache' = Block_map.remove cid cache in
556
554
Lwt.return_some
557
-
((Node (cid, bytes) : ordered_item), (new_queue, cache', missing))
558
-
)
555
+
( (Node (cid, bytes) : ordered_item)
556
+
, (new_queue, cache', missing) ) )
559
557
in
560
558
Lwt_seq.unfold_lwt step ([`Node t.root], Block_map.empty, Cid.Set.empty)
561
559
···
565
563
let entries =
566
564
if entries_are_sorted node.entries then node.entries
567
565
else
568
-
List.sort (fun (a : entry) b -> String.compare a.key b.key)
566
+
List.sort
567
+
(fun (a : entry) b -> String.compare a.key b.key)
569
568
node.entries
570
569
in
571
570
let%lwt left =
···
612
611
| Error e ->
613
612
raise e
614
613
in
615
-
try%lwt Lwt.map Result.ok (aux node)
616
-
with e -> Lwt.return_error e
614
+
try%lwt Lwt.map Result.ok (aux node) with e -> Lwt.return_error e
617
615
618
616
(* raw-node helpers for covering proofs: operate on stored bytes, not re-serialization *)
619
617
type interleaved_entry =
···
769
767
let missing = ref Cid.Set.empty in
770
768
let acc = ref Block_map.empty in
771
769
let add_block cid bytes =
772
-
if not (Block_map.has cid !acc) then
773
-
acc := Block_map.set cid bytes !acc
770
+
if not (Block_map.has cid !acc) then acc := Block_map.set cid bytes !acc
774
771
in
775
772
let get_bytes_cached cid =
776
773
match Block_map.get cid !cache with
···
792
789
| None ->
793
790
Lwt.return_unit
794
791
| Some leaf_cid -> (
795
-
match%lwt get_bytes_cached leaf_cid with
796
-
| Some bytes ->
797
-
add_block leaf_cid bytes ;
798
-
Lwt.return_unit
799
-
| None ->
800
-
Lwt.return_unit )
792
+
match%lwt get_bytes_cached leaf_cid with
793
+
| Some bytes ->
794
+
add_block leaf_cid bytes ; Lwt.return_unit
795
+
| None ->
796
+
Lwt.return_unit )
801
797
in
802
798
let rec proof_for_key_cached cid key =
803
799
match%lwt get_bytes_cached cid with
804
800
| None ->
805
801
Lwt.return_unit
806
-
| Some bytes ->
802
+
| Some bytes -> (
807
803
add_block cid bytes ;
808
804
let raw = decode_block_raw bytes in
809
805
let keys = node_entry_keys raw in
810
806
let seq = interleave_raw raw keys in
811
807
let index = find_gte_leaf_index key seq in
812
-
( match List.nth_opt seq index with
808
+
match List.nth_opt seq index with
813
809
| Some (Leaf (k, _, _)) when k = key ->
814
810
Lwt.return_unit
815
811
| Some (Leaf (_k, v_right, _)) -> (
···
896
892
, (cid, bytes) :: nodes
897
893
, leaves'
898
894
, List.rev_append next_cids queue ) )
899
-
(visited, nodes, leaves, rest) batch
895
+
(visited, nodes, leaves, rest)
896
+
batch
900
897
in
901
898
loop next_queue visited' nodes' leaves'
902
899
in
···
1212
1209
| None ->
1213
1210
Lwt.return []
1214
1211
1212
+
(* returns the minimum key in a subtree by following the leftmost path *)
1213
+
let rec get_min_key (t : t) (cid : Cid.t) : string option Lwt.t =
1214
+
match%lwt retrieve_node_raw t cid with
1215
+
| None ->
1216
+
Lwt.return_none
1217
+
| Some raw -> (
1218
+
match raw.l with
1219
+
| Some left_cid ->
1220
+
get_min_key t left_cid
1221
+
| None -> (
1222
+
match raw.e with
1223
+
| [] ->
1224
+
Lwt.return_none
1225
+
| first :: _ ->
1226
+
Lwt.return_some (Bytes.to_string first.k) ) )
1227
+
1228
+
(* returns the maximum key in a subtree by following the rightmost path *)
1229
+
let rec get_max_key (t : t) (cid : Cid.t) : string option Lwt.t =
1230
+
match%lwt retrieve_node_raw t cid with
1231
+
| None ->
1232
+
Lwt.return_none
1233
+
| Some raw -> (
1234
+
let keys = decompress_keys raw in
1235
+
match List.rev (List.combine keys raw.e) with
1236
+
| [] -> (
1237
+
match raw.l with
1238
+
| Some left_cid ->
1239
+
get_max_key t left_cid
1240
+
| None ->
1241
+
Lwt.return_none )
1242
+
| (last_key, last_entry) :: _ -> (
1243
+
match last_entry.t with
1244
+
| Some right_cid ->
1245
+
get_max_key t right_cid
1246
+
| None ->
1247
+
Lwt.return_some last_key ) )
1248
+
1215
1249
(* rebuild a subtree from leaves
1216
1250
returns (root_cid option, actual_layer) *)
1217
1251
let rebuild_subtree (blockstore : bs) (leaves : (string * Cid.t) list) :
···
1265
1299
let%lwt wrapped_old =
1266
1300
wrap_to_layer t.blockstore old_root_cid old_root_layer (key_layer - 1)
1267
1301
in
1268
-
(* get all keys from old tree to determine position *)
1269
-
let%lwt old_leaves = collect_subtree_leaves t old_root_cid in
1270
-
let old_keys = List.map fst old_leaves in
1271
-
let all_less = List.for_all (fun k -> k < key) old_keys in
1272
-
let all_greater = List.for_all (fun k -> k > key) old_keys in
1302
+
(* check boundary keys to determine position *)
1303
+
let%lwt min_key = get_min_key t old_root_cid in
1304
+
let%lwt max_key = get_max_key t old_root_cid in
1305
+
let all_less = match max_key with Some mx -> mx < key | None -> true in
1306
+
let all_greater = match min_key with Some mn -> mn > key | None -> true in
1273
1307
if all_less then
1274
1308
(* all old keys < new key: old tree is left, new entry has no right *)
1275
1309
let entries = compress_entries [(key, value, None)] in
···
1279
1313
let entries = compress_entries [(key, value, Some wrapped_old)] in
1280
1314
persist_node_raw t.blockstore {l= None; e= entries}
1281
1315
else
1282
-
(* key is in the middle: need to split *)
1316
+
(* key is in the middle: need to split; collect all leaves *)
1317
+
let%lwt old_leaves = collect_subtree_leaves t old_root_cid in
1283
1318
let left_leaves = List.filter (fun (k, _) -> k < key) old_leaves in
1284
1319
let right_leaves = List.filter (fun (k, _) -> k > key) old_leaves in
1285
1320
let%lwt left_cid, left_layer = rebuild_subtree t.blockstore left_leaves in
+40
-1
mist/test/test_mst.ml
+40
-1
mist/test/test_mst.ml
···
860
860
Alcotest.fail "key should exist after update" ) ;
861
861
Lwt.return_ok ()
862
862
863
+
let test_get_min_max_keys () =
864
+
let store = Storage.Memory_blockstore.create () in
865
+
let cid1 =
866
+
cid_of_string_exn
867
+
"bafyreie5cvv4h45feadgeuwhbcutmh6t2ceseocckahdoe6uat64zmz454"
868
+
in
869
+
let* mst = Mem_mst.create_empty store in
870
+
(* empty tree *)
871
+
let%lwt min_empty = Mem_mst.get_min_key mst mst.root in
872
+
let%lwt max_empty = Mem_mst.get_max_key mst mst.root in
873
+
Alcotest.(check (option string)) "empty min" None min_empty ;
874
+
Alcotest.(check (option string)) "empty max" None max_empty ;
875
+
(* single entry *)
876
+
let%lwt mst = Mem_mst.add mst "com.example/mmm" cid1 in
877
+
let%lwt min_single = Mem_mst.get_min_key mst mst.root in
878
+
let%lwt max_single = Mem_mst.get_max_key mst mst.root in
879
+
Alcotest.(check (option string)) "single min" (Some "com.example/mmm") min_single ;
880
+
Alcotest.(check (option string)) "single max" (Some "com.example/mmm") max_single ;
881
+
(* multiple entries at different layers *)
882
+
let%lwt mst = Mem_mst.add mst "com.example/aaa" cid1 in
883
+
let%lwt mst = Mem_mst.add mst "com.example/zzz" cid1 in
884
+
let%lwt mst = Mem_mst.add mst "com.example/bbb" cid1 in
885
+
let%lwt mst = Mem_mst.add mst "com.example/yyy" cid1 in
886
+
let%lwt min_key = Mem_mst.get_min_key mst mst.root in
887
+
let%lwt max_key = Mem_mst.get_max_key mst mst.root in
888
+
Alcotest.(check (option string)) "multi min" (Some "com.example/aaa") min_key ;
889
+
Alcotest.(check (option string)) "multi max" (Some "com.example/zzz") max_key ;
890
+
(* add keys with high layer values to exercise deeper tree structure *)
891
+
let%lwt mst = Mem_mst.add mst "com.example.record/3jqfcqzm3fs2j" cid1 in
892
+
let%lwt mst = Mem_mst.add mst "com.example.record/3jqfcqzm3fn2j" cid1 in
893
+
let%lwt min_deep = Mem_mst.get_min_key mst mst.root in
894
+
let%lwt max_deep = Mem_mst.get_max_key mst mst.root in
895
+
Alcotest.(check (option string)) "deep min" (Some "com.example.record/3jqfcqzm3fn2j") min_deep ;
896
+
Alcotest.(check (option string)) "deep max" (Some "com.example/zzz") max_deep ;
897
+
Lwt.return_ok ()
898
+
863
899
let () =
864
900
let open Alcotest in
865
901
let run_test test =
···
909
945
; test_case "mixed incremental ops" `Quick (fun () ->
910
946
run_test test_incremental_mixed_ops_canonicity )
911
947
; test_case "incremental edge cases" `Quick (fun () ->
912
-
run_test test_incremental_edge_cases ) ] ) ]
948
+
run_test test_incremental_edge_cases ) ] )
949
+
; ( "boundary functions"
950
+
, [ test_case "get_min_key and get_max_key" `Quick (fun () ->
951
+
run_test test_get_min_max_keys ) ] ) ]