(** Sidecar document codec. The sidecar format separates the JSON view from CRDT metadata: - View: Plain JSON/CBOR that any system can read - Metadata: CRDT info stored separately (node IDs, timestamps, etc.) This is useful for: - Compatibility with non-CRDT systems (they just read the view) - Debugging (human-readable JSON view) - Storage optimization (view can be cached/indexed separately) Format: - view: Plain JSON value (recursive structure) - meta: Object containing: - clock: Array of [sid, time] pairs - paths: Object mapping JSON Pointer paths to node metadata - Each path entry: {id: [sid, time], type: "str"|"obj"|"arr"|etc} *) module J = Simdjsont.Json let escape_component token = let buf = Buffer.create (String.length token) in String.iter (fun c -> match c with | '~' -> Buffer.add_string buf "~0" | '/' -> Buffer.add_string buf "~1" | c -> Buffer.add_char buf c) token; Buffer.contents buf type node_meta = { node_id : Clock.timestamp; node_type : string; chunks : chunk_meta list option; } and chunk_meta = { chunk_id : Clock.timestamp; chunk_span : int; chunk_deleted : bool; } type sidecar = { view : Value.t; clock : Clock.clock_vector; paths : (string * node_meta) list; } let encode_timestamp (ts : Clock.timestamp) : J.t = J.Array [ J.Float (Float.of_int ts.sid); J.Float (Float.of_int ts.time) ] let encode_clock_vector (cv : Clock.clock_vector) : J.t = let local_entry = J.Array [ J.Float (Float.of_int cv.local.clock_sid); J.Float (Float.of_int cv.local.clock_time); ] in let peer_entries = List.map (fun (sid, time) -> J.Array [ J.Float (Float.of_int sid); J.Float (Float.of_int time) ]) cv.peers in J.Array (local_entry :: peer_entries) let encode_chunk_meta (cm : chunk_meta) : J.t = let fields = [ ("id", encode_timestamp cm.chunk_id); ("span", J.Float (Float.of_int cm.chunk_span)); ] in let fields = if cm.chunk_deleted then fields @ [ ("deleted", J.Bool true) ] else fields in J.Object fields let encode_node_meta (nm : node_meta) : J.t = let fields = [ ("id", encode_timestamp nm.node_id); ("type", J.String nm.node_type) ] in let fields = match nm.chunks with | Some chunks -> fields @ [ ("chunks", J.Array (List.map encode_chunk_meta chunks)) ] | None -> fields in J.Object fields let rec collect_meta model (node : Node.t) (path : string) : (string * node_meta) list = let id = Node.id node in let node_type = Node.name node in match node with | Node.Node_con _ -> [ (path, { node_id = id; node_type; chunks = None }) ] | Node.Node_val { val_ref; _ } -> ( match val_ref with | None -> [ (path, { node_id = id; node_type; chunks = None }) ] | Some ref_ts -> ( match Model.get_node model ref_ts with | Some child -> collect_meta model child path | None -> [ (path, { node_id = id; node_type; chunks = None }) ])) | Node.Node_obj { obj_entries; _ } -> let self_meta = (path, { node_id = id; node_type; chunks = None }) in let child_metas = List.concat_map (fun (entry : Node.obj_entry) -> let child_path = path ^ "/" ^ escape_component entry.obj_key in match Model.get_node model entry.obj_value with | Some child -> collect_meta model child child_path | None -> []) obj_entries in self_meta :: child_metas | Node.Node_vec { vec_slots; _ } -> let self_meta = (path, { node_id = id; node_type; chunks = None }) in let child_metas = List.concat_map (fun (slot : Node.vec_slot) -> let child_path = path ^ "/" ^ string_of_int slot.vec_idx in match Model.get_node model slot.vec_value with | Some child -> collect_meta model child child_path | None -> []) vec_slots in self_meta :: child_metas | Node.Node_arr { arr_rga; _ } -> let chunks = Rga.fold (fun acc (chunk : Clock.timestamp Rga.chunk) -> { chunk_id = chunk.id; chunk_span = chunk.span; chunk_deleted = chunk.deleted; } :: acc) [] arr_rga |> List.rev in let self_meta = (path, { node_id = id; node_type; chunks = Some chunks }) in let idx = ref 0 in let child_metas = Rga.fold (fun acc (chunk : Clock.timestamp Rga.chunk) -> if chunk.deleted then acc else begin let child_path = path ^ "/" ^ string_of_int !idx in incr idx; match Model.get_node model chunk.data with | Some child -> acc @ collect_meta model child child_path | None -> acc end) [] arr_rga in self_meta :: child_metas | Node.Node_str { str_rga; _ } -> let chunks = Rga.fold (fun acc (chunk : string Rga.chunk) -> { chunk_id = chunk.id; chunk_span = chunk.span; chunk_deleted = chunk.deleted; } :: acc) [] str_rga |> List.rev in [ (path, { node_id = id; node_type; chunks = Some chunks }) ] | Node.Node_bin { bin_rga; _ } -> let chunks = Rga.fold (fun acc (chunk : bytes Rga.chunk) -> { chunk_id = chunk.id; chunk_span = chunk.span; chunk_deleted = chunk.deleted; } :: acc) [] bin_rga |> List.rev in [ (path, { node_id = id; node_type; chunks = Some chunks }) ] let encode (model : Model.t) : sidecar = let view = Model.view model in let clock = model.clock in let paths = match model.root with | Node.Node_val { val_ref = Some ref_ts; _ } -> ( match Model.get_node model ref_ts with | Some content -> collect_meta model content "" | None -> []) | _ -> [] in { view; clock; paths } let to_json (sidecar : sidecar) : J.t = let view_json = Value_codec.to_json sidecar.view in let clock_json = encode_clock_vector sidecar.clock in let paths_json = J.Object (List.map (fun (path, nm) -> let key = if path = "" then "/" else path in (key, encode_node_meta nm)) sidecar.paths) in J.Object [ ("view", view_json); ("meta", J.Object [ ("clock", clock_json); ("paths", paths_json) ]); ] let encode_string ?(minify = false) model = let sidecar = encode model in let json = to_json sidecar in let _ = minify in J.to_string json let decode_timestamp (json : J.t) : Clock.timestamp option = match json with | J.Array [ J.Float sid_f; J.Float time_f ] -> Some { Clock.sid = Float.to_int sid_f; time = Float.to_int time_f } | J.Array [ J.Int sid_i; J.Int time_i ] -> Some { Clock.sid = Int64.to_int sid_i; time = Int64.to_int time_i } | _ -> None let decode_clock_vector (json : J.t) : Clock.clock_vector option = match json with | J.Array (first :: rest) -> ( match decode_timestamp first with | Some ts -> let local : Clock.logical_clock = { clock_sid = ts.sid; clock_time = ts.time } in let peers = List.filter_map (fun entry -> match decode_timestamp entry with | Some ts -> Some (ts.sid, ts.time) | None -> None) rest in Some { Clock.local; peers } | None -> None) | _ -> None let get_member key (mems : (string * J.t) list) : J.t option = List.find_map (fun (k, v) -> if k = key then Some v else None) mems let decode_chunk_meta (json : J.t) : chunk_meta option = match json with | J.Object fields -> ( match (get_member "id" fields, get_member "span" fields) with | Some id_json, Some (J.Float span_f) -> ( match decode_timestamp id_json with | Some chunk_id -> let deleted = match get_member "deleted" fields with | Some (J.Bool b) -> b | _ -> false in Some { chunk_id; chunk_span = Float.to_int span_f; chunk_deleted = deleted; } | None -> None) | Some id_json, Some (J.Int span_i) -> ( match decode_timestamp id_json with | Some chunk_id -> let deleted = match get_member "deleted" fields with | Some (J.Bool b) -> b | _ -> false in Some { chunk_id; chunk_span = Int64.to_int span_i; chunk_deleted = deleted; } | None -> None) | _ -> None) | _ -> None let decode_node_meta (json : J.t) : node_meta option = match json with | J.Object fields -> ( match (get_member "id" fields, get_member "type" fields) with | Some id_json, Some (J.String node_type) -> ( match decode_timestamp id_json with | Some node_id -> let chunks = match get_member "chunks" fields with | Some (J.Array chunk_jsons) -> Some (List.filter_map decode_chunk_meta chunk_jsons) | _ -> None in Some { node_id; node_type; chunks } | None -> None) | _ -> None) | _ -> None let decode_paths (json : J.t) : (string * node_meta) list = match json with | J.Object entries -> List.filter_map (fun (key, value) -> match decode_node_meta value with | Some nm -> Some ((if key = "/" then "" else key), nm) | None -> None) entries | _ -> [] let from_json (json : J.t) : sidecar option = match json with | J.Object fields -> ( match (get_member "view" fields, get_member "meta" fields) with | Some view_json, Some (J.Object meta_fields) -> ( let view = Value_codec.of_json view_json in match get_member "clock" meta_fields with | Some clock_json -> ( match decode_clock_vector clock_json with | Some clock -> let paths = match get_member "paths" meta_fields with | Some paths_json -> decode_paths paths_json | None -> [] in Some { view; clock; paths } | None -> None) | None -> None) | _ -> None) | _ -> None let decode (sidecar : sidecar) : Model.t = let sid = sidecar.clock.local.clock_sid in let model = Model.create sid in model.clock.local.clock_time <- sidecar.clock.local.clock_time; model.clock.peers <- sidecar.clock.peers; let _root_meta = List.assoc_opt "" sidecar.paths in let rec build_node path (value : Value.t) : Node.t option = let node_meta = List.assoc_opt path sidecar.paths in let id = match node_meta with | Some nm -> nm.node_id | None -> Clock.tick model.clock.local in match value with | Value.Null | Value.Undefined | Value.Bool _ | Value.Int _ | Value.Float _ | Value.String _ | Value.Bytes _ -> let node = Node.make_con ~id ~value in Model.add_node model node; Some node | Value.Object pairs -> let node = Node.make_obj ~id in Model.add_node model node; List.iter (fun (key, child_value) -> let child_path = path ^ "/" ^ escape_component key in match build_node child_path child_value with | Some child_node -> let child_id = Node.id child_node in Node.set_obj_key node ~key ~value:child_id ~write_ts:child_id | None -> ()) pairs; Some node | Value.Array items -> let node = Node.make_arr ~id in Model.add_node model node; List.iteri (fun i child_value -> let child_path = path ^ "/" ^ string_of_int i in match build_node child_path child_value with | Some child_node -> let child_id = Node.id child_node in let chunk_id = Clock.tick model.clock.local in let after = match node with | Node.Node_arr { arr_rga; _ } -> Rga.find_last_id arr_rga | _ -> None in Node.insert_arr node ~after ~chunk_id ~value:child_id | None -> ()) items; Some node | Value.Timestamp_ref (_sid, _time) -> let node = Node.make_con ~id ~value in Model.add_node model node; Some node in (match build_node "" sidecar.view with | Some content_node -> let content_id = Node.id content_node in Node.set_val model.root ~value:content_id | None -> ()); model let decode_string s : Model.t option = match Simdjsont.Decode.decode_string Simdjsont.Decode.value s with | Ok json -> ( match from_json json with | Some sidecar -> Some (decode sidecar) | None -> None) | Error e -> Printf.eprintf "JSON parse error: %s\n" e; None let view_only (model : Model.t) : Value.t = Model.view model let view_only_string ?(minify = false) model = let view = view_only model in let json = Value_codec.to_json view in let _ = minify in J.to_string json