crdt library in ocaml implementing json-joy
1(** Sidecar document codec.
2
3 The sidecar format separates the JSON view from CRDT metadata:
4 - View: Plain JSON/CBOR that any system can read
5 - Metadata: CRDT info stored separately (node IDs, timestamps, etc.)
6
7 This is useful for:
8 - Compatibility with non-CRDT systems (they just read the view)
9 - Debugging (human-readable JSON view)
10 - Storage optimization (view can be cached/indexed separately)
11
12 Format:
13 - view: Plain JSON value (recursive structure)
14 - meta: Object containing:
15 - clock: Array of [sid, time] pairs
16 - paths: Object mapping JSON Pointer paths to node metadata
17 - Each path entry: {id: [sid, time], type: "str"|"obj"|"arr"|etc} *)
18
19module J = Simdjsont.Json
20
21let escape_component token =
22 let buf = Buffer.create (String.length token) in
23 String.iter
24 (fun c ->
25 match c with
26 | '~' -> Buffer.add_string buf "~0"
27 | '/' -> Buffer.add_string buf "~1"
28 | c -> Buffer.add_char buf c)
29 token;
30 Buffer.contents buf
31
32type node_meta = {
33 node_id : Clock.timestamp;
34 node_type : string;
35 chunks : chunk_meta list option;
36}
37
38and chunk_meta = {
39 chunk_id : Clock.timestamp;
40 chunk_span : int;
41 chunk_deleted : bool;
42}
43
44type sidecar = {
45 view : Value.t;
46 clock : Clock.clock_vector;
47 paths : (string * node_meta) list;
48}
49
50let encode_timestamp (ts : Clock.timestamp) : J.t =
51 J.Array [ J.Float (Float.of_int ts.sid); J.Float (Float.of_int ts.time) ]
52
53let encode_clock_vector (cv : Clock.clock_vector) : J.t =
54 let local_entry =
55 J.Array
56 [
57 J.Float (Float.of_int cv.local.clock_sid);
58 J.Float (Float.of_int cv.local.clock_time);
59 ]
60 in
61 let peer_entries =
62 List.map
63 (fun (sid, time) ->
64 J.Array [ J.Float (Float.of_int sid); J.Float (Float.of_int time) ])
65 cv.peers
66 in
67 J.Array (local_entry :: peer_entries)
68
69let encode_chunk_meta (cm : chunk_meta) : J.t =
70 let fields =
71 [
72 ("id", encode_timestamp cm.chunk_id);
73 ("span", J.Float (Float.of_int cm.chunk_span));
74 ]
75 in
76 let fields =
77 if cm.chunk_deleted then fields @ [ ("deleted", J.Bool true) ] else fields
78 in
79 J.Object fields
80
81let encode_node_meta (nm : node_meta) : J.t =
82 let fields =
83 [ ("id", encode_timestamp nm.node_id); ("type", J.String nm.node_type) ]
84 in
85 let fields =
86 match nm.chunks with
87 | Some chunks ->
88 fields @ [ ("chunks", J.Array (List.map encode_chunk_meta chunks)) ]
89 | None -> fields
90 in
91 J.Object fields
92
93let rec collect_meta model (node : Node.t) (path : string) :
94 (string * node_meta) list =
95 let id = Node.id node in
96 let node_type = Node.name node in
97 match node with
98 | Node.Node_con _ -> [ (path, { node_id = id; node_type; chunks = None }) ]
99 | Node.Node_val { val_ref; _ } -> (
100 match val_ref with
101 | None -> [ (path, { node_id = id; node_type; chunks = None }) ]
102 | Some ref_ts -> (
103 match Model.get_node model ref_ts with
104 | Some child -> collect_meta model child path
105 | None -> [ (path, { node_id = id; node_type; chunks = None }) ]))
106 | Node.Node_obj { obj_entries; _ } ->
107 let self_meta = (path, { node_id = id; node_type; chunks = None }) in
108 let child_metas =
109 List.concat_map
110 (fun (entry : Node.obj_entry) ->
111 let child_path = path ^ "/" ^ escape_component entry.obj_key in
112 match Model.get_node model entry.obj_value with
113 | Some child -> collect_meta model child child_path
114 | None -> [])
115 obj_entries
116 in
117 self_meta :: child_metas
118 | Node.Node_vec { vec_slots; _ } ->
119 let self_meta = (path, { node_id = id; node_type; chunks = None }) in
120 let child_metas =
121 List.concat_map
122 (fun (slot : Node.vec_slot) ->
123 let child_path = path ^ "/" ^ string_of_int slot.vec_idx in
124 match Model.get_node model slot.vec_value with
125 | Some child -> collect_meta model child child_path
126 | None -> [])
127 vec_slots
128 in
129 self_meta :: child_metas
130 | Node.Node_arr { arr_rga; _ } ->
131 let chunks =
132 Rga.fold
133 (fun acc (chunk : Clock.timestamp Rga.chunk) ->
134 {
135 chunk_id = chunk.id;
136 chunk_span = chunk.span;
137 chunk_deleted = chunk.deleted;
138 }
139 :: acc)
140 [] arr_rga
141 |> List.rev
142 in
143 let self_meta =
144 (path, { node_id = id; node_type; chunks = Some chunks })
145 in
146 let idx = ref 0 in
147 let child_metas =
148 Rga.fold
149 (fun acc (chunk : Clock.timestamp Rga.chunk) ->
150 if chunk.deleted then acc
151 else begin
152 let child_path = path ^ "/" ^ string_of_int !idx in
153 incr idx;
154 match Model.get_node model chunk.data with
155 | Some child -> acc @ collect_meta model child child_path
156 | None -> acc
157 end)
158 [] arr_rga
159 in
160 self_meta :: child_metas
161 | Node.Node_str { str_rga; _ } ->
162 let chunks =
163 Rga.fold
164 (fun acc (chunk : string Rga.chunk) ->
165 {
166 chunk_id = chunk.id;
167 chunk_span = chunk.span;
168 chunk_deleted = chunk.deleted;
169 }
170 :: acc)
171 [] str_rga
172 |> List.rev
173 in
174 [ (path, { node_id = id; node_type; chunks = Some chunks }) ]
175 | Node.Node_bin { bin_rga; _ } ->
176 let chunks =
177 Rga.fold
178 (fun acc (chunk : bytes Rga.chunk) ->
179 {
180 chunk_id = chunk.id;
181 chunk_span = chunk.span;
182 chunk_deleted = chunk.deleted;
183 }
184 :: acc)
185 [] bin_rga
186 |> List.rev
187 in
188 [ (path, { node_id = id; node_type; chunks = Some chunks }) ]
189
190let encode (model : Model.t) : sidecar =
191 let view = Model.view model in
192 let clock = model.clock in
193 let paths =
194 match model.root with
195 | Node.Node_val { val_ref = Some ref_ts; _ } -> (
196 match Model.get_node model ref_ts with
197 | Some content -> collect_meta model content ""
198 | None -> [])
199 | _ -> []
200 in
201 { view; clock; paths }
202
203let to_json (sidecar : sidecar) : J.t =
204 let view_json = Value_codec.to_json sidecar.view in
205 let clock_json = encode_clock_vector sidecar.clock in
206 let paths_json =
207 J.Object
208 (List.map
209 (fun (path, nm) ->
210 let key = if path = "" then "/" else path in
211 (key, encode_node_meta nm))
212 sidecar.paths)
213 in
214 J.Object
215 [
216 ("view", view_json);
217 ("meta", J.Object [ ("clock", clock_json); ("paths", paths_json) ]);
218 ]
219
220let encode_string ?(minify = false) model =
221 let sidecar = encode model in
222 let json = to_json sidecar in
223 let _ = minify in
224 J.to_string json
225
226let decode_timestamp (json : J.t) : Clock.timestamp option =
227 match json with
228 | J.Array [ J.Float sid_f; J.Float time_f ] ->
229 Some { Clock.sid = Float.to_int sid_f; time = Float.to_int time_f }
230 | J.Array [ J.Int sid_i; J.Int time_i ] ->
231 Some { Clock.sid = Int64.to_int sid_i; time = Int64.to_int time_i }
232 | _ -> None
233
234let decode_clock_vector (json : J.t) : Clock.clock_vector option =
235 match json with
236 | J.Array (first :: rest) -> (
237 match decode_timestamp first with
238 | Some ts ->
239 let local : Clock.logical_clock =
240 { clock_sid = ts.sid; clock_time = ts.time }
241 in
242 let peers =
243 List.filter_map
244 (fun entry ->
245 match decode_timestamp entry with
246 | Some ts -> Some (ts.sid, ts.time)
247 | None -> None)
248 rest
249 in
250 Some { Clock.local; peers }
251 | None -> None)
252 | _ -> None
253
254let get_member key (mems : (string * J.t) list) : J.t option =
255 List.find_map (fun (k, v) -> if k = key then Some v else None) mems
256
257let decode_chunk_meta (json : J.t) : chunk_meta option =
258 match json with
259 | J.Object fields -> (
260 match (get_member "id" fields, get_member "span" fields) with
261 | Some id_json, Some (J.Float span_f) -> (
262 match decode_timestamp id_json with
263 | Some chunk_id ->
264 let deleted =
265 match get_member "deleted" fields with
266 | Some (J.Bool b) -> b
267 | _ -> false
268 in
269 Some
270 {
271 chunk_id;
272 chunk_span = Float.to_int span_f;
273 chunk_deleted = deleted;
274 }
275 | None -> None)
276 | Some id_json, Some (J.Int span_i) -> (
277 match decode_timestamp id_json with
278 | Some chunk_id ->
279 let deleted =
280 match get_member "deleted" fields with
281 | Some (J.Bool b) -> b
282 | _ -> false
283 in
284 Some
285 {
286 chunk_id;
287 chunk_span = Int64.to_int span_i;
288 chunk_deleted = deleted;
289 }
290 | None -> None)
291 | _ -> None)
292 | _ -> None
293
294let decode_node_meta (json : J.t) : node_meta option =
295 match json with
296 | J.Object fields -> (
297 match (get_member "id" fields, get_member "type" fields) with
298 | Some id_json, Some (J.String node_type) -> (
299 match decode_timestamp id_json with
300 | Some node_id ->
301 let chunks =
302 match get_member "chunks" fields with
303 | Some (J.Array chunk_jsons) ->
304 Some (List.filter_map decode_chunk_meta chunk_jsons)
305 | _ -> None
306 in
307 Some { node_id; node_type; chunks }
308 | None -> None)
309 | _ -> None)
310 | _ -> None
311
312let decode_paths (json : J.t) : (string * node_meta) list =
313 match json with
314 | J.Object entries ->
315 List.filter_map
316 (fun (key, value) ->
317 match decode_node_meta value with
318 | Some nm -> Some ((if key = "/" then "" else key), nm)
319 | None -> None)
320 entries
321 | _ -> []
322
323let from_json (json : J.t) : sidecar option =
324 match json with
325 | J.Object fields -> (
326 match (get_member "view" fields, get_member "meta" fields) with
327 | Some view_json, Some (J.Object meta_fields) -> (
328 let view = Value_codec.of_json view_json in
329 match get_member "clock" meta_fields with
330 | Some clock_json -> (
331 match decode_clock_vector clock_json with
332 | Some clock ->
333 let paths =
334 match get_member "paths" meta_fields with
335 | Some paths_json -> decode_paths paths_json
336 | None -> []
337 in
338 Some { view; clock; paths }
339 | None -> None)
340 | None -> None)
341 | _ -> None)
342 | _ -> None
343
344let decode (sidecar : sidecar) : Model.t =
345 let sid = sidecar.clock.local.clock_sid in
346 let model = Model.create sid in
347 model.clock.local.clock_time <- sidecar.clock.local.clock_time;
348 model.clock.peers <- sidecar.clock.peers;
349
350 let _root_meta = List.assoc_opt "" sidecar.paths in
351
352 let rec build_node path (value : Value.t) : Node.t option =
353 let node_meta = List.assoc_opt path sidecar.paths in
354 let id =
355 match node_meta with
356 | Some nm -> nm.node_id
357 | None -> Clock.tick model.clock.local
358 in
359 match value with
360 | Value.Null | Value.Undefined | Value.Bool _ | Value.Int _ | Value.Float _
361 | Value.String _ | Value.Bytes _ ->
362 let node = Node.make_con ~id ~value in
363 Model.add_node model node;
364 Some node
365 | Value.Object pairs ->
366 let node = Node.make_obj ~id in
367 Model.add_node model node;
368 List.iter
369 (fun (key, child_value) ->
370 let child_path = path ^ "/" ^ escape_component key in
371 match build_node child_path child_value with
372 | Some child_node ->
373 let child_id = Node.id child_node in
374 Node.set_obj_key node ~key ~value:child_id ~write_ts:child_id
375 | None -> ())
376 pairs;
377 Some node
378 | Value.Array items ->
379 let node = Node.make_arr ~id in
380 Model.add_node model node;
381 List.iteri
382 (fun i child_value ->
383 let child_path = path ^ "/" ^ string_of_int i in
384 match build_node child_path child_value with
385 | Some child_node ->
386 let child_id = Node.id child_node in
387 let chunk_id = Clock.tick model.clock.local in
388 let after =
389 match node with
390 | Node.Node_arr { arr_rga; _ } -> Rga.find_last_id arr_rga
391 | _ -> None
392 in
393 Node.insert_arr node ~after ~chunk_id ~value:child_id
394 | None -> ())
395 items;
396 Some node
397 | Value.Timestamp_ref (_sid, _time) ->
398 let node = Node.make_con ~id ~value in
399 Model.add_node model node;
400 Some node
401 in
402
403 (match build_node "" sidecar.view with
404 | Some content_node ->
405 let content_id = Node.id content_node in
406 Node.set_val model.root ~value:content_id
407 | None -> ());
408
409 model
410
411let decode_string s : Model.t option =
412 match Simdjsont.Decode.decode_string Simdjsont.Decode.value s with
413 | Ok json -> (
414 match from_json json with
415 | Some sidecar -> Some (decode sidecar)
416 | None -> None)
417 | Error e ->
418 Printf.eprintf "JSON parse error: %s\n" e;
419 None
420
421let view_only (model : Model.t) : Value.t = Model.view model
422
423let view_only_string ?(minify = false) model =
424 let view = view_only model in
425 let json = Value_codec.to_json view in
426 let _ = minify in
427 J.to_string json