crdt library in ocaml implementing json-joy
1(** CBOR and binary encoding/decoding primitives.
2
3 This module provides low-level encoding and decoding functions for:
4 - CBOR (RFC 7049) value encoding/decoding
5 - Variable-length integers (vu57, b1vu56)
6 - Compact ID encoding for CRDT timestamps
7
8 The implementation is optimized for performance with:
9 - Inline annotations on hot paths
10 - Unsafe byte operations where safe
11 - Pre-allocated buffers with growth strategy
12 - Batched capacity checks *)
13
14(** {1 Encoder} *)
15
16type encoder = { mutable buf : bytes; mutable pos : int; mutable cap : int }
17(** Mutable encoder with auto-growing buffer *)
18
19(** Create a new encoder with optional initial capacity (default 4096) *)
20let create_encoder ?(capacity = 4096) () =
21 { buf = Bytes.create capacity; pos = 0; cap = capacity }
22
23(** Reset encoder position to reuse buffer *)
24let reset_encoder enc = enc.pos <- 0
25
26(** Grow the encoder buffer to accommodate at least [min_additional] more bytes
27*)
28let grow enc min_additional =
29 let new_cap = max (enc.cap * 2) (enc.pos + min_additional) in
30 let new_buf = Bytes.create new_cap in
31 Bytes.blit enc.buf 0 new_buf 0 enc.pos;
32 enc.buf <- new_buf;
33 enc.cap <- new_cap
34
35(** Ensure capacity for n more bytes *)
36let[@inline] ensure_capacity enc n = if enc.pos + n > enc.cap then grow enc n
37
38(** Get the encoded contents as bytes *)
39let encoder_contents enc = Bytes.sub enc.buf 0 enc.pos
40
41(** Get the encoded contents as string *)
42let encoder_contents_string enc = Bytes.sub_string enc.buf 0 enc.pos
43
44(** {2 Primitive Writers} *)
45
46(** Write a single byte *)
47let[@inline] write_u8 enc b =
48 if enc.pos >= enc.cap then grow enc 16;
49 Bytes.unsafe_set enc.buf enc.pos (Char.chr b);
50 enc.pos <- enc.pos + 1
51
52(** Write a 32-bit unsigned integer in big-endian *)
53let[@inline] write_u32 enc n =
54 if enc.pos + 4 > enc.cap then grow enc 16;
55 Bytes.set_int32_be enc.buf enc.pos (Int32.of_int n);
56 enc.pos <- enc.pos + 4
57
58(** Set a 32-bit value at a specific offset (for backpatching) *)
59let[@inline] set_u32 enc offset n =
60 Bytes.set_int32_be enc.buf offset (Int32.of_int n)
61
62(** Write raw bytes *)
63let write_bytes enc data =
64 let len = Bytes.length data in
65 if enc.pos + len > enc.cap then grow enc len;
66 Bytes.blit data 0 enc.buf enc.pos len;
67 enc.pos <- enc.pos + len
68
69(** Write a string as raw bytes *)
70let write_string enc s =
71 let len = String.length s in
72 if enc.pos + len > enc.cap then grow enc len;
73 Bytes.blit_string s 0 enc.buf enc.pos len;
74 enc.pos <- enc.pos + len
75
76(** {2 Variable-Length Integer Encoding} *)
77
78(** Encode variable-length unsigned integer up to 57 bits (8 bytes max).
79
80 Format: each byte uses 7 bits for value, high bit indicates continuation.
81 {v
82 byte 1 byte 2 ... byte 8
83 ?zzzzzzz ?zzzzzzz zzzzzzzz
84 v}
85 where ? = 1 means more bytes follow, ? = 0 means last byte *)
86let[@inline] write_vu57 enc num =
87 if enc.pos + 10 > enc.cap then grow enc 16;
88 let buf = enc.buf in
89 let rec write_loop pos n =
90 if n <= 0x7f then begin
91 Bytes.unsafe_set buf pos (Char.chr n);
92 pos + 1
93 end
94 else begin
95 Bytes.unsafe_set buf pos (Char.chr (0x80 lor (n land 0x7f)));
96 write_loop (pos + 1) (n lsr 7)
97 end
98 in
99 enc.pos <- write_loop enc.pos num
100
101(** Encode 1-bit flag + up to 56-bit value.
102
103 Format: first byte uses 6 bits for value, then continuation like vu57.
104 {v
105 byte 1 byte 2 ...
106 f?zzzzzz ?zzzzzzz
107 v}
108 where f = flag bit, ? = continuation bit *)
109let[@inline] write_b1vu56 enc flag num =
110 if enc.pos + 10 > enc.cap then grow enc 16;
111 let buf = enc.buf in
112 let flag_bit = if flag then 0x80 else 0 in
113 if num <= 0x3f then begin
114 Bytes.unsafe_set buf enc.pos (Char.chr (flag_bit lor num));
115 enc.pos <- enc.pos + 1
116 end
117 else begin
118 Bytes.unsafe_set buf enc.pos
119 (Char.chr (flag_bit lor 0x40 lor (num land 0x3f)));
120 let rec write_rest pos n =
121 if n <= 0x7f then begin
122 Bytes.unsafe_set buf pos (Char.chr n);
123 pos + 1
124 end
125 else begin
126 Bytes.unsafe_set buf pos (Char.chr (0x80 lor (n land 0x7f)));
127 write_rest (pos + 1) (n lsr 7)
128 end
129 in
130 enc.pos <- write_rest (enc.pos + 1) (num lsr 6)
131 end
132
133(** Write CRDT ID in compact form.
134
135 If session_index <= 7 and time_diff <= 15: single byte (0xxxyyyy) Otherwise:
136 b1vu56(1, session_index) + vu57(time_diff) *)
137let[@inline] write_id enc x y =
138 if x <= 7 && y <= 15 then begin
139 if enc.pos >= enc.cap then grow enc 16;
140 Bytes.unsafe_set enc.buf enc.pos (Char.chr ((x lsl 4) lor y));
141 enc.pos <- enc.pos + 1
142 end
143 else begin
144 write_b1vu56 enc true x;
145 write_vu57 enc y
146 end
147
148(** {2 CBOR Encoding} *)
149
150(** Write CBOR unsigned integer (major type 0) *)
151let[@inline] write_cbor_uint enc n =
152 if enc.pos + 9 > enc.cap then grow enc 16;
153 let buf = enc.buf in
154 let pos = enc.pos in
155 if n <= 23 then begin
156 Bytes.unsafe_set buf pos (Char.chr n);
157 enc.pos <- pos + 1
158 end
159 else if n <= 0xff then begin
160 Bytes.unsafe_set buf pos (Char.chr 0x18);
161 Bytes.unsafe_set buf (pos + 1) (Char.chr n);
162 enc.pos <- pos + 2
163 end
164 else if n <= 0xffff then begin
165 Bytes.unsafe_set buf pos (Char.chr 0x19);
166 Bytes.unsafe_set buf (pos + 1) (Char.chr ((n lsr 8) land 0xff));
167 Bytes.unsafe_set buf (pos + 2) (Char.chr (n land 0xff));
168 enc.pos <- pos + 3
169 end
170 else if n <= 0xffffffff then begin
171 Bytes.unsafe_set buf pos (Char.chr 0x1a);
172 Bytes.unsafe_set buf (pos + 1) (Char.chr ((n lsr 24) land 0xff));
173 Bytes.unsafe_set buf (pos + 2) (Char.chr ((n lsr 16) land 0xff));
174 Bytes.unsafe_set buf (pos + 3) (Char.chr ((n lsr 8) land 0xff));
175 Bytes.unsafe_set buf (pos + 4) (Char.chr (n land 0xff));
176 enc.pos <- pos + 5
177 end
178 else begin
179 Bytes.unsafe_set buf pos (Char.chr 0x1b);
180 Bytes.unsafe_set buf (pos + 1) (Char.chr ((n lsr 56) land 0xff));
181 Bytes.unsafe_set buf (pos + 2) (Char.chr ((n lsr 48) land 0xff));
182 Bytes.unsafe_set buf (pos + 3) (Char.chr ((n lsr 40) land 0xff));
183 Bytes.unsafe_set buf (pos + 4) (Char.chr ((n lsr 32) land 0xff));
184 Bytes.unsafe_set buf (pos + 5) (Char.chr ((n lsr 24) land 0xff));
185 Bytes.unsafe_set buf (pos + 6) (Char.chr ((n lsr 16) land 0xff));
186 Bytes.unsafe_set buf (pos + 7) (Char.chr ((n lsr 8) land 0xff));
187 Bytes.unsafe_set buf (pos + 8) (Char.chr (n land 0xff));
188 enc.pos <- pos + 9
189 end
190
191(** Write CBOR negative integer (major type 1) *)
192let write_cbor_negint enc n =
193 let encoded = -1 - n in
194 if encoded <= 23 then write_u8 enc (0x20 lor encoded)
195 else if encoded <= 0xff then begin
196 write_u8 enc 0x38;
197 write_u8 enc encoded
198 end
199 else if encoded <= 0xffff then begin
200 write_u8 enc 0x39;
201 write_u8 enc ((encoded lsr 8) land 0xff);
202 write_u8 enc (encoded land 0xff)
203 end
204 else if encoded <= 0xffffffff then begin
205 write_u8 enc 0x3a;
206 write_u8 enc ((encoded lsr 24) land 0xff);
207 write_u8 enc ((encoded lsr 16) land 0xff);
208 write_u8 enc ((encoded lsr 8) land 0xff);
209 write_u8 enc (encoded land 0xff)
210 end
211 else begin
212 write_u8 enc 0x3b;
213 write_u8 enc ((encoded lsr 56) land 0xff);
214 write_u8 enc ((encoded lsr 48) land 0xff);
215 write_u8 enc ((encoded lsr 40) land 0xff);
216 write_u8 enc ((encoded lsr 32) land 0xff);
217 write_u8 enc ((encoded lsr 24) land 0xff);
218 write_u8 enc ((encoded lsr 16) land 0xff);
219 write_u8 enc ((encoded lsr 8) land 0xff);
220 write_u8 enc (encoded land 0xff)
221 end
222
223(** Write CBOR integer (chooses unsigned or negative encoding) *)
224let[@inline] write_cbor_int enc n =
225 if n >= 0 then write_cbor_uint enc n else write_cbor_negint enc n
226
227(** Write CBOR text string (major type 3) *)
228let[@inline] write_cbor_string enc s =
229 let len = String.length s in
230 if enc.pos + len + 5 > enc.cap then grow enc (len + 8);
231 let buf = enc.buf in
232 let pos = enc.pos in
233 let header_len =
234 if len <= 23 then begin
235 Bytes.unsafe_set buf pos (Char.chr (0x60 lor len));
236 1
237 end
238 else if len <= 0xff then begin
239 Bytes.unsafe_set buf pos (Char.chr 0x78);
240 Bytes.unsafe_set buf (pos + 1) (Char.chr len);
241 2
242 end
243 else if len <= 0xffff then begin
244 Bytes.unsafe_set buf pos (Char.chr 0x79);
245 Bytes.unsafe_set buf (pos + 1) (Char.chr ((len lsr 8) land 0xff));
246 Bytes.unsafe_set buf (pos + 2) (Char.chr (len land 0xff));
247 3
248 end
249 else begin
250 Bytes.unsafe_set buf pos (Char.chr 0x7a);
251 Bytes.unsafe_set buf (pos + 1) (Char.chr ((len lsr 24) land 0xff));
252 Bytes.unsafe_set buf (pos + 2) (Char.chr ((len lsr 16) land 0xff));
253 Bytes.unsafe_set buf (pos + 3) (Char.chr ((len lsr 8) land 0xff));
254 Bytes.unsafe_set buf (pos + 4) (Char.chr (len land 0xff));
255 5
256 end
257 in
258 Bytes.blit_string s 0 buf (pos + header_len) len;
259 enc.pos <- pos + header_len + len
260
261(** Write CBOR byte string (major type 2) *)
262let write_cbor_bytes enc data =
263 let len = Bytes.length data in
264 if len <= 23 then write_u8 enc (0x40 lor len)
265 else if len <= 0xff then begin
266 write_u8 enc 0x58;
267 write_u8 enc len
268 end
269 else if len <= 0xffff then begin
270 write_u8 enc 0x59;
271 write_u8 enc ((len lsr 8) land 0xff);
272 write_u8 enc (len land 0xff)
273 end
274 else begin
275 write_u8 enc 0x5a;
276 write_u8 enc ((len lsr 24) land 0xff);
277 write_u8 enc ((len lsr 16) land 0xff);
278 write_u8 enc ((len lsr 8) land 0xff);
279 write_u8 enc (len land 0xff)
280 end;
281 write_bytes enc data
282
283(** Write CBOR float64 (major type 7, additional 27) *)
284let write_cbor_float enc f =
285 write_u8 enc 0xfb;
286 let bits = Int64.bits_of_float f in
287 for i = 7 downto 0 do
288 write_u8 enc
289 (Int64.to_int (Int64.logand (Int64.shift_right bits (i * 8)) 0xffL))
290 done
291
292(** Write CBOR null *)
293let[@inline] write_cbor_null enc = write_u8 enc 0xf6
294
295(** Write CBOR undefined *)
296let[@inline] write_cbor_undefined enc = write_u8 enc 0xf7
297
298(** Write CBOR boolean *)
299let[@inline] write_cbor_bool enc b = write_u8 enc (if b then 0xf5 else 0xf4)
300
301(** Write CBOR array header (major type 4) *)
302let write_cbor_array_header enc len =
303 if len <= 23 then write_u8 enc (0x80 lor len)
304 else if len <= 0xff then begin
305 write_u8 enc 0x98;
306 write_u8 enc len
307 end
308 else if len <= 0xffff then begin
309 write_u8 enc 0x99;
310 write_u8 enc ((len lsr 8) land 0xff);
311 write_u8 enc (len land 0xff)
312 end
313 else begin
314 write_u8 enc 0x9a;
315 write_u8 enc ((len lsr 24) land 0xff);
316 write_u8 enc ((len lsr 16) land 0xff);
317 write_u8 enc ((len lsr 8) land 0xff);
318 write_u8 enc (len land 0xff)
319 end
320
321(** Write CBOR map header (major type 5) *)
322let write_cbor_map_header enc len =
323 if len <= 23 then write_u8 enc (0xa0 lor len)
324 else if len <= 0xff then begin
325 write_u8 enc 0xb8;
326 write_u8 enc len
327 end
328 else if len <= 0xffff then begin
329 write_u8 enc 0xb9;
330 write_u8 enc ((len lsr 8) land 0xff);
331 write_u8 enc (len land 0xff)
332 end
333 else begin
334 write_u8 enc 0xba;
335 write_u8 enc ((len lsr 24) land 0xff);
336 write_u8 enc ((len lsr 16) land 0xff);
337 write_u8 enc ((len lsr 8) land 0xff);
338 write_u8 enc (len land 0xff)
339 end
340
341(** Write a complete Value.t as CBOR via Cbor_simd *)
342let write_cbor_value enc (v : Value.t) =
343 let cbor_bytes = Cbor_simd.encode v in
344 write_bytes enc cbor_bytes
345
346(** {1 Decoder} *)
347
348type decoder = { data : bytes; mutable pos : int; len : int }
349(** Mutable decoder state *)
350
351(** Create a decoder from bytes *)
352let create_decoder data = { data; pos = 0; len = Bytes.length data }
353
354(** Create a decoder from string *)
355let create_decoder_string s =
356 { data = Bytes.of_string s; pos = 0; len = String.length s }
357
358(** Get remaining bytes in decoder *)
359let decoder_remaining dec = dec.len - dec.pos
360
361(** Check if decoder has more data *)
362let decoder_has_more dec = dec.pos < dec.len
363
364(** Get current decoder position *)
365let decoder_pos dec = dec.pos
366
367(** Set decoder position (for seeking) *)
368let set_decoder_pos dec pos = dec.pos <- pos
369
370(** {2 Primitive Readers} *)
371
372(** Read a single byte *)
373let read_u8 dec =
374 if dec.pos >= dec.len then failwith "read_u8: unexpected end";
375 let b = Bytes.get_uint8 dec.data dec.pos in
376 dec.pos <- dec.pos + 1;
377 b
378
379(** Peek at next byte without consuming *)
380let peek_u8 dec =
381 if dec.pos >= dec.len then failwith "peek_u8: unexpected end";
382 Bytes.get_uint8 dec.data dec.pos
383
384(** Read 32-bit unsigned integer in big-endian *)
385let read_u32 dec =
386 if dec.pos + 4 > dec.len then failwith "read_u32: unexpected end";
387 let n = Bytes.get_int32_be dec.data dec.pos in
388 dec.pos <- dec.pos + 4;
389 Int32.to_int n
390
391(** Read n bytes *)
392let read_bytes dec n =
393 if dec.pos + n > dec.len then failwith "read_bytes: unexpected end";
394 let result = Bytes.sub dec.data dec.pos n in
395 dec.pos <- dec.pos + n;
396 result
397
398(** Read n bytes as string *)
399let read_string dec n = Bytes.to_string (read_bytes dec n)
400
401(** {2 Variable-Length Integer Decoding} *)
402
403(** Read variable-length unsigned integer (up to 57 bits) *)
404let read_vu57 dec =
405 let rec loop result shift =
406 let b = read_u8 dec in
407 let value = b land 0x7f in
408 let result = result lor (value lsl shift) in
409 if b < 0x80 then result else loop result (shift + 7)
410 in
411 loop 0 0
412
413(** Read 1-bit flag + variable-length unsigned integer (up to 56 bits) *)
414let read_b1vu56 dec =
415 let byte = read_u8 dec in
416 let flag = byte land 0x80 <> 0 in
417 let first_value = byte land 0x3f in
418 let has_continuation = byte land 0x40 <> 0 in
419 if not has_continuation then (flag, first_value)
420 else
421 let rec loop result shift =
422 let b = read_u8 dec in
423 let value = b land 0x7f in
424 let result = result lor (value lsl shift) in
425 if b < 0x80 then result else loop result (shift + 7)
426 in
427 let rest = loop 0 0 in
428 (flag, first_value lor (rest lsl 6))
429
430(** Read CRDT ID in compact form. Returns (session_index, time_diff) *)
431let read_id dec =
432 let byte = read_u8 dec in
433 if byte <= 127 then (byte lsr 4, byte land 0x0f)
434 else begin
435 dec.pos <- dec.pos - 1;
436 let _, x = read_b1vu56 dec in
437 let y = read_vu57 dec in
438 (x, y)
439 end
440
441(** {2 CBOR Decoding} *)
442
443(** Read CBOR length/value based on additional info byte *)
444let read_cbor_length dec additional =
445 if additional <= 23 then additional
446 else if additional = 24 then read_u8 dec
447 else if additional = 25 then
448 let b1 = read_u8 dec in
449 let b2 = read_u8 dec in
450 (b1 lsl 8) lor b2
451 else if additional = 26 then
452 let b1 = read_u8 dec in
453 let b2 = read_u8 dec in
454 let b3 = read_u8 dec in
455 let b4 = read_u8 dec in
456 (b1 lsl 24) lor (b2 lsl 16) lor (b3 lsl 8) lor b4
457 else if additional = 27 then begin
458 let bytes = read_bytes dec 8 in
459 let b0 = Bytes.get_uint8 bytes 0 in
460 let b1 = Bytes.get_uint8 bytes 1 in
461 let b2 = Bytes.get_uint8 bytes 2 in
462 let b3 = Bytes.get_uint8 bytes 3 in
463 let b4 = Bytes.get_uint8 bytes 4 in
464 let b5 = Bytes.get_uint8 bytes 5 in
465 let b6 = Bytes.get_uint8 bytes 6 in
466 let b7 = Bytes.get_uint8 bytes 7 in
467 (b0 lsl 56) lor (b1 lsl 48) lor (b2 lsl 40) lor (b3 lsl 32) lor (b4 lsl 24)
468 lor (b5 lsl 16) lor (b6 lsl 8) lor b7
469 end
470 else failwith "read_cbor_length: unsupported additional info"
471
472(** Read CBOR float64 *)
473let read_cbor_float dec =
474 let bytes = read_bytes dec 8 in
475 let bits =
476 Int64.logor
477 (Int64.logor
478 (Int64.logor
479 (Int64.shift_left (Int64.of_int (Bytes.get_uint8 bytes 0)) 56)
480 (Int64.shift_left (Int64.of_int (Bytes.get_uint8 bytes 1)) 48))
481 (Int64.logor
482 (Int64.shift_left (Int64.of_int (Bytes.get_uint8 bytes 2)) 40)
483 (Int64.shift_left (Int64.of_int (Bytes.get_uint8 bytes 3)) 32)))
484 (Int64.logor
485 (Int64.logor
486 (Int64.shift_left (Int64.of_int (Bytes.get_uint8 bytes 4)) 24)
487 (Int64.shift_left (Int64.of_int (Bytes.get_uint8 bytes 5)) 16))
488 (Int64.logor
489 (Int64.shift_left (Int64.of_int (Bytes.get_uint8 bytes 6)) 8)
490 (Int64.of_int (Bytes.get_uint8 bytes 7))))
491 in
492 Int64.float_of_bits bits
493
494(** Read a complete CBOR value as Value.t via Cbor_simd.
495
496 Note: We need to read the CBOR value starting at the current decoder
497 position, then advance the decoder past the consumed bytes. Since
498 Cbor_simd's decoder tracks its own position, we create a sub-decoder and
499 sync positions after. *)
500let read_cbor_value dec : Value.t =
501 let start_pos = dec.pos in
502 let remaining = Bytes.sub dec.data start_pos (dec.len - start_pos) in
503 let cbor_dec = Cbor_simd.create_decoder_bytes remaining in
504 let value = Cbor_simd.read_value cbor_dec in
505 dec.pos <- start_pos + Cbor_simd.decoder_pos cbor_dec;
506 value
507
508(** Read a CBOR string (expects major type 3) *)
509let read_cbor_string_only dec =
510 let byte = read_u8 dec in
511 let major = byte lsr 5 in
512 let additional = byte land 0x1f in
513 if major <> 3 then failwith "read_cbor_string_only: expected string";
514 let len = read_cbor_length dec additional in
515 read_string dec len
516
517(** {1 Convenience Functions} *)
518
519(** Encode a Value.t to CBOR bytes *)
520let encode_cbor (v : Value.t) : bytes =
521 let enc = create_encoder () in
522 write_cbor_value enc v;
523 encoder_contents enc
524
525(** Encode a Value.t to CBOR string *)
526let encode_cbor_string (v : Value.t) : string =
527 let enc = create_encoder () in
528 write_cbor_value enc v;
529 encoder_contents_string enc
530
531(** Decode CBOR bytes to Value.t *)
532let decode_cbor (data : bytes) : Value.t =
533 let dec = create_decoder data in
534 read_cbor_value dec
535
536(** Decode CBOR string to Value.t *)
537let decode_cbor_string (s : string) : Value.t =
538 let dec = create_decoder_string s in
539 read_cbor_value dec