crdt library in ocaml implementing json-joy
at main 14 kB view raw
1(** Sidecar document codec. 2 3 The sidecar format separates the JSON view from CRDT metadata: 4 - View: Plain JSON/CBOR that any system can read 5 - Metadata: CRDT info stored separately (node IDs, timestamps, etc.) 6 7 This is useful for: 8 - Compatibility with non-CRDT systems (they just read the view) 9 - Debugging (human-readable JSON view) 10 - Storage optimization (view can be cached/indexed separately) 11 12 Format: 13 - view: Plain JSON value (recursive structure) 14 - meta: Object containing: 15 - clock: Array of [sid, time] pairs 16 - paths: Object mapping JSON Pointer paths to node metadata 17 - Each path entry: {id: [sid, time], type: "str"|"obj"|"arr"|etc} *) 18 19module J = Simdjsont.Json 20 21let escape_component token = 22 let buf = Buffer.create (String.length token) in 23 String.iter 24 (fun c -> 25 match c with 26 | '~' -> Buffer.add_string buf "~0" 27 | '/' -> Buffer.add_string buf "~1" 28 | c -> Buffer.add_char buf c) 29 token; 30 Buffer.contents buf 31 32type node_meta = { 33 node_id : Clock.timestamp; 34 node_type : string; 35 chunks : chunk_meta list option; 36} 37 38and chunk_meta = { 39 chunk_id : Clock.timestamp; 40 chunk_span : int; 41 chunk_deleted : bool; 42} 43 44type sidecar = { 45 view : Value.t; 46 clock : Clock.clock_vector; 47 paths : (string * node_meta) list; 48} 49 50let encode_timestamp (ts : Clock.timestamp) : J.t = 51 J.Array [ J.Float (Float.of_int ts.sid); J.Float (Float.of_int ts.time) ] 52 53let encode_clock_vector (cv : Clock.clock_vector) : J.t = 54 let local_entry = 55 J.Array 56 [ 57 J.Float (Float.of_int cv.local.clock_sid); 58 J.Float (Float.of_int cv.local.clock_time); 59 ] 60 in 61 let peer_entries = 62 List.map 63 (fun (sid, time) -> 64 J.Array [ J.Float (Float.of_int sid); J.Float (Float.of_int time) ]) 65 cv.peers 66 in 67 J.Array (local_entry :: peer_entries) 68 69let encode_chunk_meta (cm : chunk_meta) : J.t = 70 let fields = 71 [ 72 ("id", encode_timestamp cm.chunk_id); 73 ("span", J.Float (Float.of_int cm.chunk_span)); 74 ] 75 in 76 let fields = 77 if cm.chunk_deleted then fields @ [ ("deleted", J.Bool true) ] else fields 78 in 79 J.Object fields 80 81let encode_node_meta (nm : node_meta) : J.t = 82 let fields = 83 [ ("id", encode_timestamp nm.node_id); ("type", J.String nm.node_type) ] 84 in 85 let fields = 86 match nm.chunks with 87 | Some chunks -> 88 fields @ [ ("chunks", J.Array (List.map encode_chunk_meta chunks)) ] 89 | None -> fields 90 in 91 J.Object fields 92 93let rec collect_meta model (node : Node.t) (path : string) : 94 (string * node_meta) list = 95 let id = Node.id node in 96 let node_type = Node.name node in 97 match node with 98 | Node.Node_con _ -> [ (path, { node_id = id; node_type; chunks = None }) ] 99 | Node.Node_val { val_ref; _ } -> ( 100 match val_ref with 101 | None -> [ (path, { node_id = id; node_type; chunks = None }) ] 102 | Some ref_ts -> ( 103 match Model.get_node model ref_ts with 104 | Some child -> collect_meta model child path 105 | None -> [ (path, { node_id = id; node_type; chunks = None }) ])) 106 | Node.Node_obj { obj_entries; _ } -> 107 let self_meta = (path, { node_id = id; node_type; chunks = None }) in 108 let child_metas = 109 List.concat_map 110 (fun (entry : Node.obj_entry) -> 111 let child_path = path ^ "/" ^ escape_component entry.obj_key in 112 match Model.get_node model entry.obj_value with 113 | Some child -> collect_meta model child child_path 114 | None -> []) 115 obj_entries 116 in 117 self_meta :: child_metas 118 | Node.Node_vec { vec_slots; _ } -> 119 let self_meta = (path, { node_id = id; node_type; chunks = None }) in 120 let child_metas = 121 List.concat_map 122 (fun (slot : Node.vec_slot) -> 123 let child_path = path ^ "/" ^ string_of_int slot.vec_idx in 124 match Model.get_node model slot.vec_value with 125 | Some child -> collect_meta model child child_path 126 | None -> []) 127 vec_slots 128 in 129 self_meta :: child_metas 130 | Node.Node_arr { arr_rga; _ } -> 131 let chunks = 132 Rga.fold 133 (fun acc (chunk : Clock.timestamp Rga.chunk) -> 134 { 135 chunk_id = chunk.id; 136 chunk_span = chunk.span; 137 chunk_deleted = chunk.deleted; 138 } 139 :: acc) 140 [] arr_rga 141 |> List.rev 142 in 143 let self_meta = 144 (path, { node_id = id; node_type; chunks = Some chunks }) 145 in 146 let idx = ref 0 in 147 let child_metas = 148 Rga.fold 149 (fun acc (chunk : Clock.timestamp Rga.chunk) -> 150 if chunk.deleted then acc 151 else begin 152 let child_path = path ^ "/" ^ string_of_int !idx in 153 incr idx; 154 match Model.get_node model chunk.data with 155 | Some child -> acc @ collect_meta model child child_path 156 | None -> acc 157 end) 158 [] arr_rga 159 in 160 self_meta :: child_metas 161 | Node.Node_str { str_rga; _ } -> 162 let chunks = 163 Rga.fold 164 (fun acc (chunk : string Rga.chunk) -> 165 { 166 chunk_id = chunk.id; 167 chunk_span = chunk.span; 168 chunk_deleted = chunk.deleted; 169 } 170 :: acc) 171 [] str_rga 172 |> List.rev 173 in 174 [ (path, { node_id = id; node_type; chunks = Some chunks }) ] 175 | Node.Node_bin { bin_rga; _ } -> 176 let chunks = 177 Rga.fold 178 (fun acc (chunk : bytes Rga.chunk) -> 179 { 180 chunk_id = chunk.id; 181 chunk_span = chunk.span; 182 chunk_deleted = chunk.deleted; 183 } 184 :: acc) 185 [] bin_rga 186 |> List.rev 187 in 188 [ (path, { node_id = id; node_type; chunks = Some chunks }) ] 189 190let encode (model : Model.t) : sidecar = 191 let view = Model.view model in 192 let clock = model.clock in 193 let paths = 194 match model.root with 195 | Node.Node_val { val_ref = Some ref_ts; _ } -> ( 196 match Model.get_node model ref_ts with 197 | Some content -> collect_meta model content "" 198 | None -> []) 199 | _ -> [] 200 in 201 { view; clock; paths } 202 203let to_json (sidecar : sidecar) : J.t = 204 let view_json = Value_codec.to_json sidecar.view in 205 let clock_json = encode_clock_vector sidecar.clock in 206 let paths_json = 207 J.Object 208 (List.map 209 (fun (path, nm) -> 210 let key = if path = "" then "/" else path in 211 (key, encode_node_meta nm)) 212 sidecar.paths) 213 in 214 J.Object 215 [ 216 ("view", view_json); 217 ("meta", J.Object [ ("clock", clock_json); ("paths", paths_json) ]); 218 ] 219 220let encode_string ?(minify = false) model = 221 let sidecar = encode model in 222 let json = to_json sidecar in 223 let _ = minify in 224 J.to_string json 225 226let decode_timestamp (json : J.t) : Clock.timestamp option = 227 match json with 228 | J.Array [ J.Float sid_f; J.Float time_f ] -> 229 Some { Clock.sid = Float.to_int sid_f; time = Float.to_int time_f } 230 | J.Array [ J.Int sid_i; J.Int time_i ] -> 231 Some { Clock.sid = Int64.to_int sid_i; time = Int64.to_int time_i } 232 | _ -> None 233 234let decode_clock_vector (json : J.t) : Clock.clock_vector option = 235 match json with 236 | J.Array (first :: rest) -> ( 237 match decode_timestamp first with 238 | Some ts -> 239 let local : Clock.logical_clock = 240 { clock_sid = ts.sid; clock_time = ts.time } 241 in 242 let peers = 243 List.filter_map 244 (fun entry -> 245 match decode_timestamp entry with 246 | Some ts -> Some (ts.sid, ts.time) 247 | None -> None) 248 rest 249 in 250 Some { Clock.local; peers } 251 | None -> None) 252 | _ -> None 253 254let get_member key (mems : (string * J.t) list) : J.t option = 255 List.find_map (fun (k, v) -> if k = key then Some v else None) mems 256 257let decode_chunk_meta (json : J.t) : chunk_meta option = 258 match json with 259 | J.Object fields -> ( 260 match (get_member "id" fields, get_member "span" fields) with 261 | Some id_json, Some (J.Float span_f) -> ( 262 match decode_timestamp id_json with 263 | Some chunk_id -> 264 let deleted = 265 match get_member "deleted" fields with 266 | Some (J.Bool b) -> b 267 | _ -> false 268 in 269 Some 270 { 271 chunk_id; 272 chunk_span = Float.to_int span_f; 273 chunk_deleted = deleted; 274 } 275 | None -> None) 276 | Some id_json, Some (J.Int span_i) -> ( 277 match decode_timestamp id_json with 278 | Some chunk_id -> 279 let deleted = 280 match get_member "deleted" fields with 281 | Some (J.Bool b) -> b 282 | _ -> false 283 in 284 Some 285 { 286 chunk_id; 287 chunk_span = Int64.to_int span_i; 288 chunk_deleted = deleted; 289 } 290 | None -> None) 291 | _ -> None) 292 | _ -> None 293 294let decode_node_meta (json : J.t) : node_meta option = 295 match json with 296 | J.Object fields -> ( 297 match (get_member "id" fields, get_member "type" fields) with 298 | Some id_json, Some (J.String node_type) -> ( 299 match decode_timestamp id_json with 300 | Some node_id -> 301 let chunks = 302 match get_member "chunks" fields with 303 | Some (J.Array chunk_jsons) -> 304 Some (List.filter_map decode_chunk_meta chunk_jsons) 305 | _ -> None 306 in 307 Some { node_id; node_type; chunks } 308 | None -> None) 309 | _ -> None) 310 | _ -> None 311 312let decode_paths (json : J.t) : (string * node_meta) list = 313 match json with 314 | J.Object entries -> 315 List.filter_map 316 (fun (key, value) -> 317 match decode_node_meta value with 318 | Some nm -> Some ((if key = "/" then "" else key), nm) 319 | None -> None) 320 entries 321 | _ -> [] 322 323let from_json (json : J.t) : sidecar option = 324 match json with 325 | J.Object fields -> ( 326 match (get_member "view" fields, get_member "meta" fields) with 327 | Some view_json, Some (J.Object meta_fields) -> ( 328 let view = Value_codec.of_json view_json in 329 match get_member "clock" meta_fields with 330 | Some clock_json -> ( 331 match decode_clock_vector clock_json with 332 | Some clock -> 333 let paths = 334 match get_member "paths" meta_fields with 335 | Some paths_json -> decode_paths paths_json 336 | None -> [] 337 in 338 Some { view; clock; paths } 339 | None -> None) 340 | None -> None) 341 | _ -> None) 342 | _ -> None 343 344let decode (sidecar : sidecar) : Model.t = 345 let sid = sidecar.clock.local.clock_sid in 346 let model = Model.create sid in 347 model.clock.local.clock_time <- sidecar.clock.local.clock_time; 348 model.clock.peers <- sidecar.clock.peers; 349 350 let _root_meta = List.assoc_opt "" sidecar.paths in 351 352 let rec build_node path (value : Value.t) : Node.t option = 353 let node_meta = List.assoc_opt path sidecar.paths in 354 let id = 355 match node_meta with 356 | Some nm -> nm.node_id 357 | None -> Clock.tick model.clock.local 358 in 359 match value with 360 | Value.Null | Value.Undefined | Value.Bool _ | Value.Int _ | Value.Float _ 361 | Value.String _ | Value.Bytes _ -> 362 let node = Node.make_con ~id ~value in 363 Model.add_node model node; 364 Some node 365 | Value.Object pairs -> 366 let node = Node.make_obj ~id in 367 Model.add_node model node; 368 List.iter 369 (fun (key, child_value) -> 370 let child_path = path ^ "/" ^ escape_component key in 371 match build_node child_path child_value with 372 | Some child_node -> 373 let child_id = Node.id child_node in 374 Node.set_obj_key node ~key ~value:child_id ~write_ts:child_id 375 | None -> ()) 376 pairs; 377 Some node 378 | Value.Array items -> 379 let node = Node.make_arr ~id in 380 Model.add_node model node; 381 List.iteri 382 (fun i child_value -> 383 let child_path = path ^ "/" ^ string_of_int i in 384 match build_node child_path child_value with 385 | Some child_node -> 386 let child_id = Node.id child_node in 387 let chunk_id = Clock.tick model.clock.local in 388 let after = 389 match node with 390 | Node.Node_arr { arr_rga; _ } -> Rga.find_last_id arr_rga 391 | _ -> None 392 in 393 Node.insert_arr node ~after ~chunk_id ~value:child_id 394 | None -> ()) 395 items; 396 Some node 397 | Value.Timestamp_ref (_sid, _time) -> 398 let node = Node.make_con ~id ~value in 399 Model.add_node model node; 400 Some node 401 in 402 403 (match build_node "" sidecar.view with 404 | Some content_node -> 405 let content_id = Node.id content_node in 406 Node.set_val model.root ~value:content_id 407 | None -> ()); 408 409 model 410 411let decode_string s : Model.t option = 412 match Simdjsont.Decode.decode_string Simdjsont.Decode.value s with 413 | Ok json -> ( 414 match from_json json with 415 | Some sidecar -> Some (decode sidecar) 416 | None -> None) 417 | Error e -> 418 Printf.eprintf "JSON parse error: %s\n" e; 419 None 420 421let view_only (model : Model.t) : Value.t = Model.view model 422 423let view_only_string ?(minify = false) model = 424 let view = view_only model in 425 let json = Value_codec.to_json view in 426 let _ = minify in 427 J.to_string json