crdt library in ocaml implementing json-joy
1(** Indexed document codec.
2
3 The indexed format stores nodes as a flat map keyed by timestamp string.
4 This is useful for:
5 - Incremental sync (fetch individual nodes by ID)
6 - Efficient lookup of specific nodes
7 - Partial document loading
8
9 Format:
10 {
11 "clock": [[sid, time], ...],
12 "root": "sid.time", // Timestamp string for root node
13 "nodes": {
14 "sid.time": {...node data...},
15 ...
16 }
17 }
18
19 Node formats in the map:
20 - con: {"t": "con", "v": value}
21 - val: {"t": "val", "r": "sid.time"} or {"t": "val"} if empty
22 - obj: {"t": "obj", "m": {"key": "sid.time", ...}}
23 - vec: {"t": "vec", "s": ["sid.time" or null, ...]}
24 - str: {"t": "str", "c": [[sid,time,value_or_span], ...]}
25 - bin: {"t": "bin", "c": [[sid,time,value_or_span], ...]}
26 - arr: {"t": "arr", "c": [[sid,time,ref_or_span], ...]} *)
27
28module J = Simdjsont.Json
29
30let ts_to_string (ts : Clock.timestamp) : string =
31 Printf.sprintf "%d.%d" ts.sid ts.time
32
33let ts_of_string (s : string) : Clock.timestamp option =
34 match String.split_on_char '.' s with
35 | [ sid_s; time_s ] -> (
36 try Some { Clock.sid = int_of_string sid_s; time = int_of_string time_s }
37 with _ -> None)
38 | _ -> None
39
40let encode_timestamp (ts : Clock.timestamp) : J.t =
41 J.Array [ J.Float (Float.of_int ts.sid); J.Float (Float.of_int ts.time) ]
42
43let decode_timestamp (json : J.t) : Clock.timestamp option =
44 match json with
45 | J.Array [ J.Float sid_f; J.Float time_f ] ->
46 Some { Clock.sid = Float.to_int sid_f; time = Float.to_int time_f }
47 | J.Array [ J.Int sid_i; J.Int time_i ] ->
48 Some { Clock.sid = Int64.to_int sid_i; time = Int64.to_int time_i }
49 | _ -> None
50
51let encode_clock (cv : Clock.clock_vector) : J.t =
52 let local_entry =
53 J.Array
54 [
55 J.Float (Float.of_int cv.local.clock_sid);
56 J.Float (Float.of_int cv.local.clock_time);
57 ]
58 in
59 let peer_entries =
60 List.map
61 (fun (sid, time) ->
62 J.Array [ J.Float (Float.of_int sid); J.Float (Float.of_int time) ])
63 cv.peers
64 in
65 J.Array (local_entry :: peer_entries)
66
67let decode_clock (json : J.t) : Clock.clock_vector option =
68 match json with
69 | J.Array (first :: rest) -> (
70 match decode_timestamp first with
71 | Some ts ->
72 let local : Clock.logical_clock =
73 { clock_sid = ts.sid; clock_time = ts.time }
74 in
75 let peers =
76 List.filter_map
77 (fun entry ->
78 match decode_timestamp entry with
79 | Some ts -> Some (ts.sid, ts.time)
80 | None -> None)
81 rest
82 in
83 Some { Clock.local; peers }
84 | None -> None)
85 | _ -> None
86
87let rec encode_value (v : Value.t) : J.t =
88 match v with
89 | Value.Null -> J.Null
90 | Value.Undefined -> J.Null
91 | Value.Bool b -> J.Bool b
92 | Value.Int i -> J.Float (Float.of_int i)
93 | Value.Float f -> J.Float f
94 | Value.String s -> J.String s
95 | Value.Bytes b -> J.String (Base64.encode_string (Bytes.to_string b))
96 | Value.Array arr -> J.Array (List.map encode_value arr)
97 | Value.Object pairs ->
98 J.Object (List.map (fun (k, v) -> (k, encode_value v)) pairs)
99 | Value.Timestamp_ref (sid, time) -> J.String (ts_to_string { sid; time })
100
101let rec decode_value (json : J.t) : Value.t =
102 match json with
103 | J.Null -> Value.Null
104 | J.Bool b -> Value.Bool b
105 | J.Float f ->
106 if Float.is_integer f && Float.abs f < Float.of_int Int.max_int then
107 Value.Int (Float.to_int f)
108 else Value.Float f
109 | J.Int i -> Value.Int (Int64.to_int i)
110 | J.String s -> Value.String s
111 | J.Array items -> Value.Array (List.map decode_value items)
112 | J.Object pairs ->
113 Value.Object (List.map (fun (k, v) -> (k, decode_value v)) pairs)
114
115let encode_node (node : Node.t) : J.t =
116 let type_field t = ("t", J.String t) in
117 match node with
118 | Node.Node_con { con_value; _ } ->
119 J.Object [ type_field "con"; ("v", encode_value con_value) ]
120 | Node.Node_val { val_ref; _ } -> (
121 match val_ref with
122 | None -> J.Object [ type_field "val" ]
123 | Some ref_ts ->
124 J.Object [ type_field "val"; ("r", J.String (ts_to_string ref_ts)) ])
125 | Node.Node_obj { obj_entries; _ } ->
126 let map_entries =
127 List.map
128 (fun (entry : Node.obj_entry) ->
129 (entry.obj_key, J.String (ts_to_string entry.obj_value)))
130 obj_entries
131 in
132 J.Object [ type_field "obj"; ("m", J.Object map_entries) ]
133 | Node.Node_vec { vec_slots; _ } ->
134 let max_idx =
135 List.fold_left
136 (fun acc (s : Node.vec_slot) -> max acc s.vec_idx)
137 (-1) vec_slots
138 in
139 let slots_arr =
140 if max_idx < 0 then []
141 else begin
142 let arr = Array.make (max_idx + 1) J.Null in
143 List.iter
144 (fun (s : Node.vec_slot) ->
145 arr.(s.vec_idx) <- J.String (ts_to_string s.vec_value))
146 vec_slots;
147 Array.to_list arr
148 end
149 in
150 J.Object [ type_field "vec"; ("s", J.Array slots_arr) ]
151 | Node.Node_str { str_rga; _ } ->
152 let chunks =
153 Rga.fold
154 (fun acc (chunk : string Rga.chunk) ->
155 let chunk_data =
156 if chunk.deleted then
157 J.Array
158 [
159 J.Float (Float.of_int chunk.id.sid);
160 J.Float (Float.of_int chunk.id.time);
161 J.Float (Float.of_int chunk.span);
162 ]
163 else
164 J.Array
165 [
166 J.Float (Float.of_int chunk.id.sid);
167 J.Float (Float.of_int chunk.id.time);
168 J.String chunk.data;
169 ]
170 in
171 chunk_data :: acc)
172 [] str_rga
173 in
174 J.Object [ type_field "str"; ("c", J.Array (List.rev chunks)) ]
175 | Node.Node_bin { bin_rga; _ } ->
176 let chunks =
177 Rga.fold
178 (fun acc (chunk : bytes Rga.chunk) ->
179 let chunk_data =
180 if chunk.deleted then
181 J.Array
182 [
183 J.Float (Float.of_int chunk.id.sid);
184 J.Float (Float.of_int chunk.id.time);
185 J.Float (Float.of_int chunk.span);
186 ]
187 else
188 let b64 = Base64.encode_string (Bytes.to_string chunk.data) in
189 J.Array
190 [
191 J.Float (Float.of_int chunk.id.sid);
192 J.Float (Float.of_int chunk.id.time);
193 J.String b64;
194 ]
195 in
196 chunk_data :: acc)
197 [] bin_rga
198 in
199 J.Object [ type_field "bin"; ("c", J.Array (List.rev chunks)) ]
200 | Node.Node_arr { arr_rga; _ } ->
201 let chunks =
202 Rga.fold
203 (fun acc (chunk : Clock.timestamp Rga.chunk) ->
204 let chunk_data =
205 if chunk.deleted then
206 J.Array
207 [
208 J.Float (Float.of_int chunk.id.sid);
209 J.Float (Float.of_int chunk.id.time);
210 J.Float (Float.of_int chunk.span);
211 ]
212 else
213 J.Array
214 [
215 J.Float (Float.of_int chunk.id.sid);
216 J.Float (Float.of_int chunk.id.time);
217 J.String (ts_to_string chunk.data);
218 ]
219 in
220 chunk_data :: acc)
221 [] arr_rga
222 in
223 J.Object [ type_field "arr"; ("c", J.Array (List.rev chunks)) ]
224
225let encode (model : Model.t) : J.t =
226 let nodes = ref [] in
227 let rec collect (node : Node.t) =
228 let id = Node.id node in
229 nodes := (ts_to_string id, encode_node node) :: !nodes;
230 match node with
231 | Node.Node_val { val_ref = Some ref_ts; _ } -> (
232 match Model.get_node model ref_ts with
233 | Some child -> collect child
234 | None -> ())
235 | Node.Node_obj { obj_entries; _ } ->
236 List.iter
237 (fun (entry : Node.obj_entry) ->
238 match Model.get_node model entry.obj_value with
239 | Some child -> collect child
240 | None -> ())
241 obj_entries
242 | Node.Node_vec { vec_slots; _ } ->
243 List.iter
244 (fun (slot : Node.vec_slot) ->
245 match Model.get_node model slot.vec_value with
246 | Some child -> collect child
247 | None -> ())
248 vec_slots
249 | Node.Node_arr { arr_rga; _ } ->
250 Rga.iter
251 (fun (chunk : Clock.timestamp Rga.chunk) ->
252 if not chunk.deleted then
253 match Model.get_node model chunk.data with
254 | Some child -> collect child
255 | None -> ())
256 arr_rga
257 | _ -> ()
258 in
259 let root_id =
260 match model.root with
261 | Node.Node_val { val_ref = Some ref_ts; _ } -> (
262 match Model.get_node model ref_ts with
263 | Some content ->
264 collect content;
265 ts_to_string (Node.id content)
266 | None -> "")
267 | _ -> ""
268 in
269 let nodes_obj = J.Object (List.rev !nodes) in
270 J.Object
271 [
272 ("clock", encode_clock model.clock);
273 ("root", J.String root_id);
274 ("nodes", nodes_obj);
275 ]
276
277let encode_string ?(minify = false) model =
278 let json = encode model in
279 let _ = minify in
280 J.to_string json
281
282let get_member key (fields : (string * J.t) list) =
283 List.find_map (fun (k, v) -> if k = key then Some v else None) fields
284
285let decode_node model (json : J.t) (id : Clock.timestamp) : Node.t option =
286 match json with
287 | J.Object fields -> (
288 match get_member "t" fields with
289 | Some (J.String node_type) -> (
290 match node_type with
291 | "con" ->
292 let value =
293 match get_member "v" fields with
294 | Some v -> decode_value v
295 | None -> Value.Undefined
296 in
297 let node = Node.make_con ~id ~value in
298 Model.add_node model node;
299 Some node
300 | "val" ->
301 let node = Node.make_val ~id in
302 Model.add_node model node;
303 Some node
304 | "obj" ->
305 let node = Node.make_obj ~id in
306 Model.add_node model node;
307 Some node
308 | "vec" ->
309 let node = Node.make_vec ~id in
310 Model.add_node model node;
311 Some node
312 | "str" ->
313 let str_rga =
314 match get_member "c" fields with
315 | Some (J.Array chunks) ->
316 let decoded =
317 List.filter_map
318 (fun chunk ->
319 match chunk with
320 | J.Array
321 [ J.Float sid_f; J.Float time_f; value_or_span ]
322 -> (
323 let chunk_id =
324 {
325 Clock.sid = Float.to_int sid_f;
326 time = Float.to_int time_f;
327 }
328 in
329 match value_or_span with
330 | J.String s ->
331 Some
332 {
333 Rga.id = chunk_id;
334 span = String.length s;
335 data = s;
336 deleted = false;
337 parent = None;
338 }
339 | J.Float span_f ->
340 let span = Float.to_int span_f in
341 Some
342 {
343 Rga.id = chunk_id;
344 span;
345 data = String.make span ' ';
346 deleted = true;
347 parent = None;
348 }
349 | J.Int span_i ->
350 let span = Int64.to_int span_i in
351 Some
352 {
353 Rga.id = chunk_id;
354 span;
355 data = String.make span ' ';
356 deleted = true;
357 parent = None;
358 }
359 | _ -> None)
360 | J.Array [ J.Int sid_i; J.Int time_i; value_or_span ]
361 -> (
362 let chunk_id =
363 {
364 Clock.sid = Int64.to_int sid_i;
365 time = Int64.to_int time_i;
366 }
367 in
368 match value_or_span with
369 | J.String s ->
370 Some
371 {
372 Rga.id = chunk_id;
373 span = String.length s;
374 data = s;
375 deleted = false;
376 parent = None;
377 }
378 | J.Float span_f ->
379 let span = Float.to_int span_f in
380 Some
381 {
382 Rga.id = chunk_id;
383 span;
384 data = String.make span ' ';
385 deleted = true;
386 parent = None;
387 }
388 | J.Int span_i ->
389 let span = Int64.to_int span_i in
390 Some
391 {
392 Rga.id = chunk_id;
393 span;
394 data = String.make span ' ';
395 deleted = true;
396 parent = None;
397 }
398 | _ -> None)
399 | _ -> None)
400 chunks
401 in
402 Rga.from_chunks decoded
403 | _ -> Rga.empty ()
404 in
405 let node = Node.Node_str { str_id = id; str_rga } in
406 Model.add_node model node;
407 Some node
408 | "bin" ->
409 let bin_rga =
410 match get_member "c" fields with
411 | Some (J.Array chunks) ->
412 let decoded =
413 List.filter_map
414 (fun chunk ->
415 match chunk with
416 | J.Array
417 [ J.Float sid_f; J.Float time_f; value_or_span ]
418 -> (
419 let chunk_id =
420 {
421 Clock.sid = Float.to_int sid_f;
422 time = Float.to_int time_f;
423 }
424 in
425 match value_or_span with
426 | J.String s -> (
427 match Base64.decode s with
428 | Ok decoded ->
429 let data = Bytes.of_string decoded in
430 Some
431 {
432 Rga.id = chunk_id;
433 span = Bytes.length data;
434 data;
435 deleted = false;
436 parent = None;
437 }
438 | Error _ -> None)
439 | J.Float span_f ->
440 let span = Float.to_int span_f in
441 Some
442 {
443 Rga.id = chunk_id;
444 span;
445 data = Bytes.make span '\x00';
446 deleted = true;
447 parent = None;
448 }
449 | J.Int span_i ->
450 let span = Int64.to_int span_i in
451 Some
452 {
453 Rga.id = chunk_id;
454 span;
455 data = Bytes.make span '\x00';
456 deleted = true;
457 parent = None;
458 }
459 | _ -> None)
460 | _ -> None)
461 chunks
462 in
463 Rga.from_chunks decoded
464 | _ -> Rga.empty ()
465 in
466 let node = Node.Node_bin { bin_id = id; bin_rga } in
467 Model.add_node model node;
468 Some node
469 | "arr" ->
470 let node = Node.make_arr ~id in
471 Model.add_node model node;
472 Some node
473 | _ -> None)
474 | _ -> None)
475 | _ -> None
476
477let link_val_ref _model (json : J.t) (node : Node.t) =
478 match (node, json) with
479 | Node.Node_val _, J.Object fields -> (
480 match get_member "r" fields with
481 | Some (J.String ref_str) -> (
482 match ts_of_string ref_str with
483 | Some ref_ts -> Node.set_val node ~value:ref_ts
484 | None -> ())
485 | _ -> ())
486 | _ -> ()
487
488let link_obj_entries _model (json : J.t) (node : Node.t) =
489 match (node, json) with
490 | Node.Node_obj _, J.Object fields -> (
491 match get_member "m" fields with
492 | Some (J.Object map_entries) ->
493 List.iter
494 (fun (key, value) ->
495 match value with
496 | J.String ref_str -> (
497 match ts_of_string ref_str with
498 | Some ref_ts ->
499 Node.set_obj_key node ~key ~value:ref_ts ~write_ts:ref_ts
500 | None -> ())
501 | _ -> ())
502 map_entries
503 | _ -> ())
504 | _ -> ()
505
506let link_vec_slots _model (json : J.t) (node : Node.t) =
507 match (node, json) with
508 | Node.Node_vec _, J.Object fields -> (
509 match get_member "s" fields with
510 | Some (J.Array slots) ->
511 List.iteri
512 (fun idx slot ->
513 match slot with
514 | J.String ref_str -> (
515 match ts_of_string ref_str with
516 | Some ref_ts ->
517 Node.set_vec_slot node ~idx ~value:ref_ts ~write_ts:ref_ts
518 | None -> ())
519 | _ -> ())
520 slots
521 | _ -> ())
522 | _ -> ()
523
524let link_arr_elements _model (json : J.t) (node : Node.t) =
525 match (node, json) with
526 | Node.Node_arr _, J.Object fields -> (
527 match get_member "c" fields with
528 | Some (J.Array chunks) ->
529 List.iter
530 (fun chunk ->
531 match chunk with
532 | J.Array [ J.Float sid_f; J.Float time_f; ref_or_span ] -> (
533 let chunk_id =
534 {
535 Clock.sid = Float.to_int sid_f;
536 time = Float.to_int time_f;
537 }
538 in
539 match ref_or_span with
540 | J.String ref_str -> (
541 match ts_of_string ref_str with
542 | Some ref_ts ->
543 let after =
544 match node with
545 | Node.Node_arr { arr_rga; _ } ->
546 Rga.find_last_id arr_rga
547 | _ -> None
548 in
549 Node.insert_arr node ~after ~chunk_id ~value:ref_ts
550 | None -> ())
551 | J.Float span_f ->
552 let span = Float.to_int span_f in
553 let after =
554 match node with
555 | Node.Node_arr { arr_rga; _ } ->
556 Rga.find_last_id arr_rga
557 | _ -> None
558 in
559 Node.insert_arr node ~after ~chunk_id ~value:chunk_id;
560 Node.delete_range node
561 ~spans:
562 [ { sid = chunk_id.sid; time = chunk_id.time; span } ]
563 | J.Int span_i ->
564 let span = Int64.to_int span_i in
565 let after =
566 match node with
567 | Node.Node_arr { arr_rga; _ } ->
568 Rga.find_last_id arr_rga
569 | _ -> None
570 in
571 Node.insert_arr node ~after ~chunk_id ~value:chunk_id;
572 Node.delete_range node
573 ~spans:
574 [ { sid = chunk_id.sid; time = chunk_id.time; span } ]
575 | _ -> ())
576 | J.Array [ J.Int sid_i; J.Int time_i; ref_or_span ] -> (
577 let chunk_id =
578 {
579 Clock.sid = Int64.to_int sid_i;
580 time = Int64.to_int time_i;
581 }
582 in
583 match ref_or_span with
584 | J.String ref_str -> (
585 match ts_of_string ref_str with
586 | Some ref_ts ->
587 let after =
588 match node with
589 | Node.Node_arr { arr_rga; _ } ->
590 Rga.find_last_id arr_rga
591 | _ -> None
592 in
593 Node.insert_arr node ~after ~chunk_id ~value:ref_ts
594 | None -> ())
595 | J.Float span_f ->
596 let span = Float.to_int span_f in
597 let after =
598 match node with
599 | Node.Node_arr { arr_rga; _ } ->
600 Rga.find_last_id arr_rga
601 | _ -> None
602 in
603 Node.insert_arr node ~after ~chunk_id ~value:chunk_id;
604 Node.delete_range node
605 ~spans:
606 [ { sid = chunk_id.sid; time = chunk_id.time; span } ]
607 | J.Int span_i ->
608 let span = Int64.to_int span_i in
609 let after =
610 match node with
611 | Node.Node_arr { arr_rga; _ } ->
612 Rga.find_last_id arr_rga
613 | _ -> None
614 in
615 Node.insert_arr node ~after ~chunk_id ~value:chunk_id;
616 Node.delete_range node
617 ~spans:
618 [ { sid = chunk_id.sid; time = chunk_id.time; span } ]
619 | _ -> ())
620 | _ -> ())
621 chunks
622 | _ -> ())
623 | _ -> ()
624
625let decode (json : J.t) : Model.t option =
626 match json with
627 | J.Object fields -> (
628 match get_member "clock" fields with
629 | Some clock_json -> (
630 match decode_clock clock_json with
631 | Some clock -> (
632 let model = Model.create clock.local.clock_sid in
633 model.clock.local.clock_time <- clock.local.clock_time;
634 model.clock.peers <- clock.peers;
635 let node_jsons = ref [] in
636 (match get_member "nodes" fields with
637 | Some (J.Object node_entries) ->
638 List.iter
639 (fun (key, node_json) ->
640 match ts_of_string key with
641 | Some id -> (
642 match decode_node model node_json id with
643 | Some node ->
644 node_jsons := (node_json, node) :: !node_jsons
645 | None -> ())
646 | None -> ())
647 node_entries
648 | _ -> ());
649 List.iter
650 (fun (json, node) ->
651 link_val_ref model json node;
652 link_obj_entries model json node;
653 link_vec_slots model json node;
654 link_arr_elements model json node)
655 !node_jsons;
656 match get_member "root" fields with
657 | Some (J.String root_str) -> (
658 match ts_of_string root_str with
659 | Some root_ts ->
660 Node.set_val model.root ~value:root_ts;
661 Some model
662 | None -> Some model)
663 | _ -> Some model)
664 | None -> None)
665 | None -> None)
666 | _ -> None
667
668let decode_string s : Model.t option =
669 match Simdjsont.Decode.decode_string Simdjsont.Decode.value s with
670 | Ok json -> decode json
671 | Error e ->
672 Printf.eprintf "JSON parse error: %s\n" e;
673 None
674
675let get_node_json (json : J.t) (ts : Clock.timestamp) : J.t option =
676 match json with
677 | J.Object fields -> (
678 match get_member "nodes" fields with
679 | Some (J.Object node_entries) ->
680 let key = ts_to_string ts in
681 get_member key node_entries
682 | _ -> None)
683 | _ -> None
684
685let get_node_ids (json : J.t) : Clock.timestamp list =
686 match json with
687 | J.Object fields -> (
688 match get_member "nodes" fields with
689 | Some (J.Object node_entries) ->
690 List.filter_map (fun (key, _) -> ts_of_string key) node_entries
691 | _ -> [])
692 | _ -> []