crdt library in ocaml implementing json-joy
at main 26 kB view raw
1(** Indexed document codec. 2 3 The indexed format stores nodes as a flat map keyed by timestamp string. 4 This is useful for: 5 - Incremental sync (fetch individual nodes by ID) 6 - Efficient lookup of specific nodes 7 - Partial document loading 8 9 Format: 10 { 11 "clock": [[sid, time], ...], 12 "root": "sid.time", // Timestamp string for root node 13 "nodes": { 14 "sid.time": {...node data...}, 15 ... 16 } 17 } 18 19 Node formats in the map: 20 - con: {"t": "con", "v": value} 21 - val: {"t": "val", "r": "sid.time"} or {"t": "val"} if empty 22 - obj: {"t": "obj", "m": {"key": "sid.time", ...}} 23 - vec: {"t": "vec", "s": ["sid.time" or null, ...]} 24 - str: {"t": "str", "c": [[sid,time,value_or_span], ...]} 25 - bin: {"t": "bin", "c": [[sid,time,value_or_span], ...]} 26 - arr: {"t": "arr", "c": [[sid,time,ref_or_span], ...]} *) 27 28module J = Simdjsont.Json 29 30let ts_to_string (ts : Clock.timestamp) : string = 31 Printf.sprintf "%d.%d" ts.sid ts.time 32 33let ts_of_string (s : string) : Clock.timestamp option = 34 match String.split_on_char '.' s with 35 | [ sid_s; time_s ] -> ( 36 try Some { Clock.sid = int_of_string sid_s; time = int_of_string time_s } 37 with _ -> None) 38 | _ -> None 39 40let encode_timestamp (ts : Clock.timestamp) : J.t = 41 J.Array [ J.Float (Float.of_int ts.sid); J.Float (Float.of_int ts.time) ] 42 43let decode_timestamp (json : J.t) : Clock.timestamp option = 44 match json with 45 | J.Array [ J.Float sid_f; J.Float time_f ] -> 46 Some { Clock.sid = Float.to_int sid_f; time = Float.to_int time_f } 47 | J.Array [ J.Int sid_i; J.Int time_i ] -> 48 Some { Clock.sid = Int64.to_int sid_i; time = Int64.to_int time_i } 49 | _ -> None 50 51let encode_clock (cv : Clock.clock_vector) : J.t = 52 let local_entry = 53 J.Array 54 [ 55 J.Float (Float.of_int cv.local.clock_sid); 56 J.Float (Float.of_int cv.local.clock_time); 57 ] 58 in 59 let peer_entries = 60 List.map 61 (fun (sid, time) -> 62 J.Array [ J.Float (Float.of_int sid); J.Float (Float.of_int time) ]) 63 cv.peers 64 in 65 J.Array (local_entry :: peer_entries) 66 67let decode_clock (json : J.t) : Clock.clock_vector option = 68 match json with 69 | J.Array (first :: rest) -> ( 70 match decode_timestamp first with 71 | Some ts -> 72 let local : Clock.logical_clock = 73 { clock_sid = ts.sid; clock_time = ts.time } 74 in 75 let peers = 76 List.filter_map 77 (fun entry -> 78 match decode_timestamp entry with 79 | Some ts -> Some (ts.sid, ts.time) 80 | None -> None) 81 rest 82 in 83 Some { Clock.local; peers } 84 | None -> None) 85 | _ -> None 86 87let rec encode_value (v : Value.t) : J.t = 88 match v with 89 | Value.Null -> J.Null 90 | Value.Undefined -> J.Null 91 | Value.Bool b -> J.Bool b 92 | Value.Int i -> J.Float (Float.of_int i) 93 | Value.Float f -> J.Float f 94 | Value.String s -> J.String s 95 | Value.Bytes b -> J.String (Base64.encode_string (Bytes.to_string b)) 96 | Value.Array arr -> J.Array (List.map encode_value arr) 97 | Value.Object pairs -> 98 J.Object (List.map (fun (k, v) -> (k, encode_value v)) pairs) 99 | Value.Timestamp_ref (sid, time) -> J.String (ts_to_string { sid; time }) 100 101let rec decode_value (json : J.t) : Value.t = 102 match json with 103 | J.Null -> Value.Null 104 | J.Bool b -> Value.Bool b 105 | J.Float f -> 106 if Float.is_integer f && Float.abs f < Float.of_int Int.max_int then 107 Value.Int (Float.to_int f) 108 else Value.Float f 109 | J.Int i -> Value.Int (Int64.to_int i) 110 | J.String s -> Value.String s 111 | J.Array items -> Value.Array (List.map decode_value items) 112 | J.Object pairs -> 113 Value.Object (List.map (fun (k, v) -> (k, decode_value v)) pairs) 114 115let encode_node (node : Node.t) : J.t = 116 let type_field t = ("t", J.String t) in 117 match node with 118 | Node.Node_con { con_value; _ } -> 119 J.Object [ type_field "con"; ("v", encode_value con_value) ] 120 | Node.Node_val { val_ref; _ } -> ( 121 match val_ref with 122 | None -> J.Object [ type_field "val" ] 123 | Some ref_ts -> 124 J.Object [ type_field "val"; ("r", J.String (ts_to_string ref_ts)) ]) 125 | Node.Node_obj { obj_entries; _ } -> 126 let map_entries = 127 List.map 128 (fun (entry : Node.obj_entry) -> 129 (entry.obj_key, J.String (ts_to_string entry.obj_value))) 130 obj_entries 131 in 132 J.Object [ type_field "obj"; ("m", J.Object map_entries) ] 133 | Node.Node_vec { vec_slots; _ } -> 134 let max_idx = 135 List.fold_left 136 (fun acc (s : Node.vec_slot) -> max acc s.vec_idx) 137 (-1) vec_slots 138 in 139 let slots_arr = 140 if max_idx < 0 then [] 141 else begin 142 let arr = Array.make (max_idx + 1) J.Null in 143 List.iter 144 (fun (s : Node.vec_slot) -> 145 arr.(s.vec_idx) <- J.String (ts_to_string s.vec_value)) 146 vec_slots; 147 Array.to_list arr 148 end 149 in 150 J.Object [ type_field "vec"; ("s", J.Array slots_arr) ] 151 | Node.Node_str { str_rga; _ } -> 152 let chunks = 153 Rga.fold 154 (fun acc (chunk : string Rga.chunk) -> 155 let chunk_data = 156 if chunk.deleted then 157 J.Array 158 [ 159 J.Float (Float.of_int chunk.id.sid); 160 J.Float (Float.of_int chunk.id.time); 161 J.Float (Float.of_int chunk.span); 162 ] 163 else 164 J.Array 165 [ 166 J.Float (Float.of_int chunk.id.sid); 167 J.Float (Float.of_int chunk.id.time); 168 J.String chunk.data; 169 ] 170 in 171 chunk_data :: acc) 172 [] str_rga 173 in 174 J.Object [ type_field "str"; ("c", J.Array (List.rev chunks)) ] 175 | Node.Node_bin { bin_rga; _ } -> 176 let chunks = 177 Rga.fold 178 (fun acc (chunk : bytes Rga.chunk) -> 179 let chunk_data = 180 if chunk.deleted then 181 J.Array 182 [ 183 J.Float (Float.of_int chunk.id.sid); 184 J.Float (Float.of_int chunk.id.time); 185 J.Float (Float.of_int chunk.span); 186 ] 187 else 188 let b64 = Base64.encode_string (Bytes.to_string chunk.data) in 189 J.Array 190 [ 191 J.Float (Float.of_int chunk.id.sid); 192 J.Float (Float.of_int chunk.id.time); 193 J.String b64; 194 ] 195 in 196 chunk_data :: acc) 197 [] bin_rga 198 in 199 J.Object [ type_field "bin"; ("c", J.Array (List.rev chunks)) ] 200 | Node.Node_arr { arr_rga; _ } -> 201 let chunks = 202 Rga.fold 203 (fun acc (chunk : Clock.timestamp Rga.chunk) -> 204 let chunk_data = 205 if chunk.deleted then 206 J.Array 207 [ 208 J.Float (Float.of_int chunk.id.sid); 209 J.Float (Float.of_int chunk.id.time); 210 J.Float (Float.of_int chunk.span); 211 ] 212 else 213 J.Array 214 [ 215 J.Float (Float.of_int chunk.id.sid); 216 J.Float (Float.of_int chunk.id.time); 217 J.String (ts_to_string chunk.data); 218 ] 219 in 220 chunk_data :: acc) 221 [] arr_rga 222 in 223 J.Object [ type_field "arr"; ("c", J.Array (List.rev chunks)) ] 224 225let encode (model : Model.t) : J.t = 226 let nodes = ref [] in 227 let rec collect (node : Node.t) = 228 let id = Node.id node in 229 nodes := (ts_to_string id, encode_node node) :: !nodes; 230 match node with 231 | Node.Node_val { val_ref = Some ref_ts; _ } -> ( 232 match Model.get_node model ref_ts with 233 | Some child -> collect child 234 | None -> ()) 235 | Node.Node_obj { obj_entries; _ } -> 236 List.iter 237 (fun (entry : Node.obj_entry) -> 238 match Model.get_node model entry.obj_value with 239 | Some child -> collect child 240 | None -> ()) 241 obj_entries 242 | Node.Node_vec { vec_slots; _ } -> 243 List.iter 244 (fun (slot : Node.vec_slot) -> 245 match Model.get_node model slot.vec_value with 246 | Some child -> collect child 247 | None -> ()) 248 vec_slots 249 | Node.Node_arr { arr_rga; _ } -> 250 Rga.iter 251 (fun (chunk : Clock.timestamp Rga.chunk) -> 252 if not chunk.deleted then 253 match Model.get_node model chunk.data with 254 | Some child -> collect child 255 | None -> ()) 256 arr_rga 257 | _ -> () 258 in 259 let root_id = 260 match model.root with 261 | Node.Node_val { val_ref = Some ref_ts; _ } -> ( 262 match Model.get_node model ref_ts with 263 | Some content -> 264 collect content; 265 ts_to_string (Node.id content) 266 | None -> "") 267 | _ -> "" 268 in 269 let nodes_obj = J.Object (List.rev !nodes) in 270 J.Object 271 [ 272 ("clock", encode_clock model.clock); 273 ("root", J.String root_id); 274 ("nodes", nodes_obj); 275 ] 276 277let encode_string ?(minify = false) model = 278 let json = encode model in 279 let _ = minify in 280 J.to_string json 281 282let get_member key (fields : (string * J.t) list) = 283 List.find_map (fun (k, v) -> if k = key then Some v else None) fields 284 285let decode_node model (json : J.t) (id : Clock.timestamp) : Node.t option = 286 match json with 287 | J.Object fields -> ( 288 match get_member "t" fields with 289 | Some (J.String node_type) -> ( 290 match node_type with 291 | "con" -> 292 let value = 293 match get_member "v" fields with 294 | Some v -> decode_value v 295 | None -> Value.Undefined 296 in 297 let node = Node.make_con ~id ~value in 298 Model.add_node model node; 299 Some node 300 | "val" -> 301 let node = Node.make_val ~id in 302 Model.add_node model node; 303 Some node 304 | "obj" -> 305 let node = Node.make_obj ~id in 306 Model.add_node model node; 307 Some node 308 | "vec" -> 309 let node = Node.make_vec ~id in 310 Model.add_node model node; 311 Some node 312 | "str" -> 313 let str_rga = 314 match get_member "c" fields with 315 | Some (J.Array chunks) -> 316 let decoded = 317 List.filter_map 318 (fun chunk -> 319 match chunk with 320 | J.Array 321 [ J.Float sid_f; J.Float time_f; value_or_span ] 322 -> ( 323 let chunk_id = 324 { 325 Clock.sid = Float.to_int sid_f; 326 time = Float.to_int time_f; 327 } 328 in 329 match value_or_span with 330 | J.String s -> 331 Some 332 { 333 Rga.id = chunk_id; 334 span = String.length s; 335 data = s; 336 deleted = false; 337 parent = None; 338 } 339 | J.Float span_f -> 340 let span = Float.to_int span_f in 341 Some 342 { 343 Rga.id = chunk_id; 344 span; 345 data = String.make span ' '; 346 deleted = true; 347 parent = None; 348 } 349 | J.Int span_i -> 350 let span = Int64.to_int span_i in 351 Some 352 { 353 Rga.id = chunk_id; 354 span; 355 data = String.make span ' '; 356 deleted = true; 357 parent = None; 358 } 359 | _ -> None) 360 | J.Array [ J.Int sid_i; J.Int time_i; value_or_span ] 361 -> ( 362 let chunk_id = 363 { 364 Clock.sid = Int64.to_int sid_i; 365 time = Int64.to_int time_i; 366 } 367 in 368 match value_or_span with 369 | J.String s -> 370 Some 371 { 372 Rga.id = chunk_id; 373 span = String.length s; 374 data = s; 375 deleted = false; 376 parent = None; 377 } 378 | J.Float span_f -> 379 let span = Float.to_int span_f in 380 Some 381 { 382 Rga.id = chunk_id; 383 span; 384 data = String.make span ' '; 385 deleted = true; 386 parent = None; 387 } 388 | J.Int span_i -> 389 let span = Int64.to_int span_i in 390 Some 391 { 392 Rga.id = chunk_id; 393 span; 394 data = String.make span ' '; 395 deleted = true; 396 parent = None; 397 } 398 | _ -> None) 399 | _ -> None) 400 chunks 401 in 402 Rga.from_chunks decoded 403 | _ -> Rga.empty () 404 in 405 let node = Node.Node_str { str_id = id; str_rga } in 406 Model.add_node model node; 407 Some node 408 | "bin" -> 409 let bin_rga = 410 match get_member "c" fields with 411 | Some (J.Array chunks) -> 412 let decoded = 413 List.filter_map 414 (fun chunk -> 415 match chunk with 416 | J.Array 417 [ J.Float sid_f; J.Float time_f; value_or_span ] 418 -> ( 419 let chunk_id = 420 { 421 Clock.sid = Float.to_int sid_f; 422 time = Float.to_int time_f; 423 } 424 in 425 match value_or_span with 426 | J.String s -> ( 427 match Base64.decode s with 428 | Ok decoded -> 429 let data = Bytes.of_string decoded in 430 Some 431 { 432 Rga.id = chunk_id; 433 span = Bytes.length data; 434 data; 435 deleted = false; 436 parent = None; 437 } 438 | Error _ -> None) 439 | J.Float span_f -> 440 let span = Float.to_int span_f in 441 Some 442 { 443 Rga.id = chunk_id; 444 span; 445 data = Bytes.make span '\x00'; 446 deleted = true; 447 parent = None; 448 } 449 | J.Int span_i -> 450 let span = Int64.to_int span_i in 451 Some 452 { 453 Rga.id = chunk_id; 454 span; 455 data = Bytes.make span '\x00'; 456 deleted = true; 457 parent = None; 458 } 459 | _ -> None) 460 | _ -> None) 461 chunks 462 in 463 Rga.from_chunks decoded 464 | _ -> Rga.empty () 465 in 466 let node = Node.Node_bin { bin_id = id; bin_rga } in 467 Model.add_node model node; 468 Some node 469 | "arr" -> 470 let node = Node.make_arr ~id in 471 Model.add_node model node; 472 Some node 473 | _ -> None) 474 | _ -> None) 475 | _ -> None 476 477let link_val_ref _model (json : J.t) (node : Node.t) = 478 match (node, json) with 479 | Node.Node_val _, J.Object fields -> ( 480 match get_member "r" fields with 481 | Some (J.String ref_str) -> ( 482 match ts_of_string ref_str with 483 | Some ref_ts -> Node.set_val node ~value:ref_ts 484 | None -> ()) 485 | _ -> ()) 486 | _ -> () 487 488let link_obj_entries _model (json : J.t) (node : Node.t) = 489 match (node, json) with 490 | Node.Node_obj _, J.Object fields -> ( 491 match get_member "m" fields with 492 | Some (J.Object map_entries) -> 493 List.iter 494 (fun (key, value) -> 495 match value with 496 | J.String ref_str -> ( 497 match ts_of_string ref_str with 498 | Some ref_ts -> 499 Node.set_obj_key node ~key ~value:ref_ts ~write_ts:ref_ts 500 | None -> ()) 501 | _ -> ()) 502 map_entries 503 | _ -> ()) 504 | _ -> () 505 506let link_vec_slots _model (json : J.t) (node : Node.t) = 507 match (node, json) with 508 | Node.Node_vec _, J.Object fields -> ( 509 match get_member "s" fields with 510 | Some (J.Array slots) -> 511 List.iteri 512 (fun idx slot -> 513 match slot with 514 | J.String ref_str -> ( 515 match ts_of_string ref_str with 516 | Some ref_ts -> 517 Node.set_vec_slot node ~idx ~value:ref_ts ~write_ts:ref_ts 518 | None -> ()) 519 | _ -> ()) 520 slots 521 | _ -> ()) 522 | _ -> () 523 524let link_arr_elements _model (json : J.t) (node : Node.t) = 525 match (node, json) with 526 | Node.Node_arr _, J.Object fields -> ( 527 match get_member "c" fields with 528 | Some (J.Array chunks) -> 529 List.iter 530 (fun chunk -> 531 match chunk with 532 | J.Array [ J.Float sid_f; J.Float time_f; ref_or_span ] -> ( 533 let chunk_id = 534 { 535 Clock.sid = Float.to_int sid_f; 536 time = Float.to_int time_f; 537 } 538 in 539 match ref_or_span with 540 | J.String ref_str -> ( 541 match ts_of_string ref_str with 542 | Some ref_ts -> 543 let after = 544 match node with 545 | Node.Node_arr { arr_rga; _ } -> 546 Rga.find_last_id arr_rga 547 | _ -> None 548 in 549 Node.insert_arr node ~after ~chunk_id ~value:ref_ts 550 | None -> ()) 551 | J.Float span_f -> 552 let span = Float.to_int span_f in 553 let after = 554 match node with 555 | Node.Node_arr { arr_rga; _ } -> 556 Rga.find_last_id arr_rga 557 | _ -> None 558 in 559 Node.insert_arr node ~after ~chunk_id ~value:chunk_id; 560 Node.delete_range node 561 ~spans: 562 [ { sid = chunk_id.sid; time = chunk_id.time; span } ] 563 | J.Int span_i -> 564 let span = Int64.to_int span_i in 565 let after = 566 match node with 567 | Node.Node_arr { arr_rga; _ } -> 568 Rga.find_last_id arr_rga 569 | _ -> None 570 in 571 Node.insert_arr node ~after ~chunk_id ~value:chunk_id; 572 Node.delete_range node 573 ~spans: 574 [ { sid = chunk_id.sid; time = chunk_id.time; span } ] 575 | _ -> ()) 576 | J.Array [ J.Int sid_i; J.Int time_i; ref_or_span ] -> ( 577 let chunk_id = 578 { 579 Clock.sid = Int64.to_int sid_i; 580 time = Int64.to_int time_i; 581 } 582 in 583 match ref_or_span with 584 | J.String ref_str -> ( 585 match ts_of_string ref_str with 586 | Some ref_ts -> 587 let after = 588 match node with 589 | Node.Node_arr { arr_rga; _ } -> 590 Rga.find_last_id arr_rga 591 | _ -> None 592 in 593 Node.insert_arr node ~after ~chunk_id ~value:ref_ts 594 | None -> ()) 595 | J.Float span_f -> 596 let span = Float.to_int span_f in 597 let after = 598 match node with 599 | Node.Node_arr { arr_rga; _ } -> 600 Rga.find_last_id arr_rga 601 | _ -> None 602 in 603 Node.insert_arr node ~after ~chunk_id ~value:chunk_id; 604 Node.delete_range node 605 ~spans: 606 [ { sid = chunk_id.sid; time = chunk_id.time; span } ] 607 | J.Int span_i -> 608 let span = Int64.to_int span_i in 609 let after = 610 match node with 611 | Node.Node_arr { arr_rga; _ } -> 612 Rga.find_last_id arr_rga 613 | _ -> None 614 in 615 Node.insert_arr node ~after ~chunk_id ~value:chunk_id; 616 Node.delete_range node 617 ~spans: 618 [ { sid = chunk_id.sid; time = chunk_id.time; span } ] 619 | _ -> ()) 620 | _ -> ()) 621 chunks 622 | _ -> ()) 623 | _ -> () 624 625let decode (json : J.t) : Model.t option = 626 match json with 627 | J.Object fields -> ( 628 match get_member "clock" fields with 629 | Some clock_json -> ( 630 match decode_clock clock_json with 631 | Some clock -> ( 632 let model = Model.create clock.local.clock_sid in 633 model.clock.local.clock_time <- clock.local.clock_time; 634 model.clock.peers <- clock.peers; 635 let node_jsons = ref [] in 636 (match get_member "nodes" fields with 637 | Some (J.Object node_entries) -> 638 List.iter 639 (fun (key, node_json) -> 640 match ts_of_string key with 641 | Some id -> ( 642 match decode_node model node_json id with 643 | Some node -> 644 node_jsons := (node_json, node) :: !node_jsons 645 | None -> ()) 646 | None -> ()) 647 node_entries 648 | _ -> ()); 649 List.iter 650 (fun (json, node) -> 651 link_val_ref model json node; 652 link_obj_entries model json node; 653 link_vec_slots model json node; 654 link_arr_elements model json node) 655 !node_jsons; 656 match get_member "root" fields with 657 | Some (J.String root_str) -> ( 658 match ts_of_string root_str with 659 | Some root_ts -> 660 Node.set_val model.root ~value:root_ts; 661 Some model 662 | None -> Some model) 663 | _ -> Some model) 664 | None -> None) 665 | None -> None) 666 | _ -> None 667 668let decode_string s : Model.t option = 669 match Simdjsont.Decode.decode_string Simdjsont.Decode.value s with 670 | Ok json -> decode json 671 | Error e -> 672 Printf.eprintf "JSON parse error: %s\n" e; 673 None 674 675let get_node_json (json : J.t) (ts : Clock.timestamp) : J.t option = 676 match json with 677 | J.Object fields -> ( 678 match get_member "nodes" fields with 679 | Some (J.Object node_entries) -> 680 let key = ts_to_string ts in 681 get_member key node_entries 682 | _ -> None) 683 | _ -> None 684 685let get_node_ids (json : J.t) : Clock.timestamp list = 686 match json with 687 | J.Object fields -> ( 688 match get_member "nodes" fields with 689 | Some (J.Object node_entries) -> 690 List.filter_map (fun (key, _) -> ts_of_string key) node_entries 691 | _ -> []) 692 | _ -> []