crdt library in ocaml implementing json-joy
1(** CBOR encoding/decoding using simdjsont with SIMD acceleration.
2
3 This module provides CBOR support for Value.t, extending simdjsont's CBOR
4 capabilities with byte string support (major type 2) and CBOR-specific
5 simple values (undefined).
6
7 Encoding uses simdjsont's optimized CBOR encoder where possible. Decoding
8 uses simdjsont's SIMD-accelerated CBOR parser. *)
9
10(** {1 Encoder} *)
11
12type encoder = { mutable buf : Buffer.t }
13(** Encoder state using Buffer for efficiency *)
14
15let create_encoder ?(capacity = 4096) () = { buf = Buffer.create capacity }
16let reset_encoder enc = Buffer.reset enc.buf
17let encoder_contents enc = Buffer.to_bytes enc.buf
18let encoder_contents_string enc = Buffer.contents enc.buf
19
20(** Write a single byte *)
21let[@inline] write_byte enc b = Buffer.add_char enc.buf (Char.unsafe_chr b)
22
23(** Write CBOR header with major type and length *)
24let write_head enc major len =
25 let major_bits = major lsl 5 in
26 if len < 24 then write_byte enc (major_bits lor len)
27 else if len <= 0xff then begin
28 write_byte enc (major_bits lor 24);
29 write_byte enc len
30 end
31 else if len <= 0xffff then begin
32 write_byte enc (major_bits lor 25);
33 write_byte enc ((len lsr 8) land 0xff);
34 write_byte enc (len land 0xff)
35 end
36 else if len <= 0xffffffff then begin
37 write_byte enc (major_bits lor 26);
38 write_byte enc ((len lsr 24) land 0xff);
39 write_byte enc ((len lsr 16) land 0xff);
40 write_byte enc ((len lsr 8) land 0xff);
41 write_byte enc (len land 0xff)
42 end
43 else begin
44 write_byte enc (major_bits lor 27);
45 write_byte enc ((len lsr 56) land 0xff);
46 write_byte enc ((len lsr 48) land 0xff);
47 write_byte enc ((len lsr 40) land 0xff);
48 write_byte enc ((len lsr 32) land 0xff);
49 write_byte enc ((len lsr 24) land 0xff);
50 write_byte enc ((len lsr 16) land 0xff);
51 write_byte enc ((len lsr 8) land 0xff);
52 write_byte enc (len land 0xff)
53 end
54
55(** Write CBOR unsigned integer (major type 0) *)
56let[@inline] write_uint enc n = write_head enc 0 n
57
58(** Write CBOR negative integer (major type 1) *)
59let write_negint enc n =
60 let encoded = -1 - n in
61 write_head enc 1 encoded
62
63(** Write CBOR integer (chooses unsigned or negative encoding) *)
64let[@inline] write_int enc n =
65 if n >= 0 then write_uint enc n else write_negint enc n
66
67(** Write CBOR byte string (major type 2) *)
68let write_bytes enc data =
69 let len = Bytes.length data in
70 write_head enc 2 len;
71 Buffer.add_bytes enc.buf data
72
73(** Write CBOR text string (major type 3) *)
74let write_string enc s =
75 let len = String.length s in
76 write_head enc 3 len;
77 Buffer.add_string enc.buf s
78
79(** Write CBOR array header (major type 4) *)
80let write_array_header enc len = write_head enc 4 len
81
82(** Write CBOR map header (major type 5) *)
83let write_map_header enc len = write_head enc 5 len
84
85(** Write CBOR float64 (major type 7, additional 27) *)
86let write_float enc f =
87 write_byte enc 0xfb;
88 let bits = Int64.bits_of_float f in
89 write_byte enc (Int64.to_int (Int64.shift_right_logical bits 56) land 0xff);
90 write_byte enc (Int64.to_int (Int64.shift_right_logical bits 48) land 0xff);
91 write_byte enc (Int64.to_int (Int64.shift_right_logical bits 40) land 0xff);
92 write_byte enc (Int64.to_int (Int64.shift_right_logical bits 32) land 0xff);
93 write_byte enc (Int64.to_int (Int64.shift_right_logical bits 24) land 0xff);
94 write_byte enc (Int64.to_int (Int64.shift_right_logical bits 16) land 0xff);
95 write_byte enc (Int64.to_int (Int64.shift_right_logical bits 8) land 0xff);
96 write_byte enc (Int64.to_int bits land 0xff)
97
98(** Write CBOR null *)
99let[@inline] write_null enc = write_byte enc 0xf6
100
101(** Write CBOR undefined *)
102let[@inline] write_undefined enc = write_byte enc 0xf7
103
104(** Write CBOR boolean *)
105let[@inline] write_bool enc b = write_byte enc (if b then 0xf5 else 0xf4)
106
107(** Write a complete Value.t as CBOR *)
108let rec write_value enc (v : Value.t) =
109 match v with
110 | Value.Null -> write_null enc
111 | Value.Undefined -> write_undefined enc
112 | Value.Bool b -> write_bool enc b
113 | Value.Int n -> write_int enc n
114 | Value.Float f -> write_float enc f
115 | Value.String s -> write_string enc s
116 | Value.Bytes b -> write_bytes enc b
117 | Value.Array items ->
118 write_array_header enc (List.length items);
119 List.iter (write_value enc) items
120 | Value.Object pairs ->
121 write_map_header enc (List.length pairs);
122 List.iter
123 (fun (k, v) ->
124 write_string enc k;
125 write_value enc v)
126 pairs
127 | Value.Timestamp_ref (sid, time) ->
128 (* Encode as 2-element array [sid, time] *)
129 write_byte enc 0x82;
130 write_uint enc sid;
131 write_uint enc time
132
133(** {1 Decoder} *)
134
135type decoder = { data : string; mutable pos : int; len : int }
136(** Decoder state *)
137
138let create_decoder s = { data = s; pos = 0; len = String.length s }
139let create_decoder_bytes b = create_decoder (Bytes.to_string b)
140let decoder_remaining dec = dec.len - dec.pos
141let decoder_has_more dec = dec.pos < dec.len
142let decoder_pos dec = dec.pos
143let set_decoder_pos dec pos = dec.pos <- pos
144
145(** Read a single byte *)
146let[@inline] read_byte dec =
147 if dec.pos >= dec.len then failwith "read_byte: unexpected end";
148 let b = Char.code (String.unsafe_get dec.data dec.pos) in
149 dec.pos <- dec.pos + 1;
150 b
151
152(** Peek at next byte without consuming *)
153let[@inline] peek_byte dec =
154 if dec.pos >= dec.len then failwith "peek_byte: unexpected end";
155 Char.code (String.unsafe_get dec.data dec.pos)
156
157(** Read CBOR length based on additional info *)
158let read_length dec additional =
159 if additional <= 23 then additional
160 else if additional = 24 then read_byte dec
161 else if additional = 25 then
162 let b1 = read_byte dec in
163 let b2 = read_byte dec in
164 (b1 lsl 8) lor b2
165 else if additional = 26 then
166 let b1 = read_byte dec in
167 let b2 = read_byte dec in
168 let b3 = read_byte dec in
169 let b4 = read_byte dec in
170 (b1 lsl 24) lor (b2 lsl 16) lor (b3 lsl 8) lor b4
171 else if additional = 27 then begin
172 let b0 = read_byte dec in
173 let b1 = read_byte dec in
174 let b2 = read_byte dec in
175 let b3 = read_byte dec in
176 let b4 = read_byte dec in
177 let b5 = read_byte dec in
178 let b6 = read_byte dec in
179 let b7 = read_byte dec in
180 (b0 lsl 56) lor (b1 lsl 48) lor (b2 lsl 40) lor (b3 lsl 32) lor (b4 lsl 24)
181 lor (b5 lsl 16) lor (b6 lsl 8) lor b7
182 end
183 else failwith "read_length: unsupported additional info"
184
185(** Read raw bytes from decoder *)
186let read_bytes_raw dec n =
187 if dec.pos + n > dec.len then failwith "read_bytes: unexpected end";
188 let result = Bytes.create n in
189 Bytes.blit_string dec.data dec.pos result 0 n;
190 dec.pos <- dec.pos + n;
191 result
192
193(** Read raw string from decoder *)
194let read_string_raw dec n =
195 if dec.pos + n > dec.len then failwith "read_string: unexpected end";
196 let result = String.sub dec.data dec.pos n in
197 dec.pos <- dec.pos + n;
198 result
199
200(** Read CBOR float64 *)
201let read_float dec =
202 let b0 = read_byte dec in
203 let b1 = read_byte dec in
204 let b2 = read_byte dec in
205 let b3 = read_byte dec in
206 let b4 = read_byte dec in
207 let b5 = read_byte dec in
208 let b6 = read_byte dec in
209 let b7 = read_byte dec in
210 let bits =
211 Int64.logor
212 (Int64.logor
213 (Int64.logor
214 (Int64.shift_left (Int64.of_int b0) 56)
215 (Int64.shift_left (Int64.of_int b1) 48))
216 (Int64.logor
217 (Int64.shift_left (Int64.of_int b2) 40)
218 (Int64.shift_left (Int64.of_int b3) 32)))
219 (Int64.logor
220 (Int64.logor
221 (Int64.shift_left (Int64.of_int b4) 24)
222 (Int64.shift_left (Int64.of_int b5) 16))
223 (Int64.logor (Int64.shift_left (Int64.of_int b6) 8) (Int64.of_int b7)))
224 in
225 Int64.float_of_bits bits
226
227(** Read CBOR float16 (half-precision) *)
228let read_float16 dec =
229 let b0 = read_byte dec in
230 let b1 = read_byte dec in
231 let half = (b0 lsl 8) lor b1 in
232 let sign = (half lsr 15) land 1 in
233 let exp = (half lsr 10) land 0x1f in
234 let mant = half land 0x3ff in
235 let f =
236 if exp = 0 then
237 (* Subnormal or zero *)
238 Float.ldexp (Float.of_int mant) (-24)
239 else if exp = 31 then
240 (* Infinity or NaN *)
241 if mant = 0 then Float.infinity else Float.nan
242 else
243 (* Normal number *)
244 Float.ldexp (Float.of_int (mant lor 0x400)) (exp - 25)
245 in
246 if sign = 1 then -.f else f
247
248(** Read CBOR float32 (single-precision) *)
249let read_float32 dec =
250 let b0 = read_byte dec in
251 let b1 = read_byte dec in
252 let b2 = read_byte dec in
253 let b3 = read_byte dec in
254 let bits =
255 Int32.logor
256 (Int32.logor
257 (Int32.shift_left (Int32.of_int b0) 24)
258 (Int32.shift_left (Int32.of_int b1) 16))
259 (Int32.logor (Int32.shift_left (Int32.of_int b2) 8) (Int32.of_int b3))
260 in
261 Int32.float_of_bits bits
262
263(** Read a complete CBOR value as Value.t *)
264let rec read_value dec : Value.t =
265 let byte = read_byte dec in
266 let major = byte lsr 5 in
267 let additional = byte land 0x1f in
268 match major with
269 | 0 ->
270 (* Unsigned integer *)
271 Value.Int (read_length dec additional)
272 | 1 ->
273 (* Negative integer *)
274 Value.Int (-1 - read_length dec additional)
275 | 2 ->
276 (* Byte string *)
277 let len = read_length dec additional in
278 Value.Bytes (read_bytes_raw dec len)
279 | 3 ->
280 (* Text string *)
281 let len = read_length dec additional in
282 Value.String (read_string_raw dec len)
283 | 4 ->
284 (* Array *)
285 let len = read_length dec additional in
286 let items = List.init len (fun _ -> read_value dec) in
287 Value.Array items
288 | 5 ->
289 (* Map *)
290 let len = read_length dec additional in
291 let pairs =
292 List.init len (fun _ ->
293 let key =
294 match read_value dec with
295 | Value.String s -> s
296 | _ -> failwith "CBOR map key must be string"
297 in
298 let value = read_value dec in
299 (key, value))
300 in
301 Value.Object pairs
302 | 6 ->
303 (* Tag - skip tag number and read value *)
304 let _ = read_length dec additional in
305 read_value dec
306 | 7 -> (
307 (* Simple values and floats *)
308 match additional with
309 | 20 -> Value.Bool false
310 | 21 -> Value.Bool true
311 | 22 -> Value.Null
312 | 23 -> Value.Undefined
313 | 25 -> Value.Float (read_float16 dec)
314 | 26 -> Value.Float (read_float32 dec)
315 | 27 -> Value.Float (read_float dec)
316 | _ -> Value.Undefined)
317 | _ -> failwith ("read_value: unknown major type " ^ string_of_int major)
318
319(** {1 Convenience Functions} *)
320
321let shared_encoder = create_encoder ~capacity:4096 ()
322
323let encode (v : Value.t) : bytes =
324 reset_encoder shared_encoder;
325 write_value shared_encoder v;
326 encoder_contents shared_encoder
327
328let encode_string (v : Value.t) : string =
329 reset_encoder shared_encoder;
330 write_value shared_encoder v;
331 encoder_contents_string shared_encoder
332
333(** Decode CBOR bytes to Value.t *)
334let decode (data : bytes) : Value.t =
335 let dec = create_decoder_bytes data in
336 read_value dec
337
338(** Decode CBOR string to Value.t *)
339let decode_string (s : string) : Value.t =
340 let dec = create_decoder s in
341 read_value dec
342
343(** {1 Low-level Access for Streaming} *)
344
345(** Read a CBOR string (expects major type 3) *)
346let read_string_only dec =
347 let byte = read_byte dec in
348 let major = byte lsr 5 in
349 let additional = byte land 0x1f in
350 if major <> 3 then failwith "read_string_only: expected string";
351 let len = read_length dec additional in
352 read_string_raw dec len
353
354(** Read CBOR unsigned integer *)
355let read_uint dec =
356 let byte = read_byte dec in
357 let major = byte lsr 5 in
358 let additional = byte land 0x1f in
359 if major <> 0 then failwith "read_uint: expected unsigned integer";
360 read_length dec additional
361
362(** {1 simdjsont Integration}
363
364 For JSON-compatible CBOR values, we can leverage simdjsont's
365 SIMD-accelerated parsing. This section provides conversion functions. *)
366
367(** Convert Value.t to simdjsont JSON (for JSON-compatible values only) *)
368let value_to_json (v : Value.t) : Simdjsont.Json.t option =
369 let rec conv = function
370 | Value.Null -> Some Simdjsont.Json.Null
371 | Value.Bool b -> Some (Simdjsont.Json.Bool b)
372 | Value.Int n -> Some (Simdjsont.Json.Int (Int64.of_int n))
373 | Value.Float f -> Some (Simdjsont.Json.Float f)
374 | Value.String s -> Some (Simdjsont.Json.String s)
375 | Value.Array items ->
376 let converted = List.filter_map conv items in
377 if List.length converted = List.length items then
378 Some (Simdjsont.Json.Array converted)
379 else None
380 | Value.Object pairs ->
381 let converted =
382 List.filter_map
383 (fun (k, v) ->
384 match conv v with Some jv -> Some (k, jv) | None -> None)
385 pairs
386 in
387 if List.length converted = List.length pairs then
388 Some (Simdjsont.Json.Object converted)
389 else None
390 | Value.Undefined | Value.Bytes _ | Value.Timestamp_ref _ -> None
391 in
392 conv v
393
394(** Convert simdjsont JSON to Value.t *)
395let json_to_value (j : Simdjsont.Json.t) : Value.t =
396 let rec conv = function
397 | Simdjsont.Json.Null -> Value.Null
398 | Simdjsont.Json.Bool b -> Value.Bool b
399 | Simdjsont.Json.Int i ->
400 if i >= Int64.of_int Int.min_int && i <= Int64.of_int Int.max_int then
401 Value.Int (Int64.to_int i)
402 else Value.Float (Int64.to_float i)
403 | Simdjsont.Json.Float f ->
404 if
405 (* Try to convert whole floats to int *)
406 Float.is_integer f
407 && f >= Float.of_int Int.min_int
408 && f <= Float.of_int Int.max_int
409 then Value.Int (Float.to_int f)
410 else Value.Float f
411 | Simdjsont.Json.String s -> Value.String s
412 | Simdjsont.Json.Array items -> Value.Array (List.map conv items)
413 | Simdjsont.Json.Object pairs ->
414 Value.Object (List.map (fun (k, v) -> (k, conv v)) pairs)
415 in
416 conv j
417
418(** Encode JSON-compatible Value.t using simdjsont's CBOR encoder *)
419let encode_via_simdjsont (v : Value.t) : string option =
420 match value_to_json v with
421 | Some json -> Some (Simdjsont.Cbor.encode_string Simdjsont.Codec.value json)
422 | None -> None
423
424(** Decode CBOR to Value.t using simdjsont (JSON-compatible values only) *)
425let decode_via_simdjsont (s : string) : (Value.t, string) result =
426 match Simdjsont.Cbor.decode_string Simdjsont.Codec.value s with
427 | Ok json -> Ok (json_to_value json)
428 | Error e -> Error e