crdt library in ocaml implementing json-joy
at main 18 kB view raw
1(** CBOR and binary encoding/decoding primitives. 2 3 This module provides low-level encoding and decoding functions for: 4 - CBOR (RFC 7049) value encoding/decoding 5 - Variable-length integers (vu57, b1vu56) 6 - Compact ID encoding for CRDT timestamps 7 8 The implementation is optimized for performance with: 9 - Inline annotations on hot paths 10 - Unsafe byte operations where safe 11 - Pre-allocated buffers with growth strategy 12 - Batched capacity checks *) 13 14(** {1 Encoder} *) 15 16type encoder = { mutable buf : bytes; mutable pos : int; mutable cap : int } 17(** Mutable encoder with auto-growing buffer *) 18 19(** Create a new encoder with optional initial capacity (default 4096) *) 20let create_encoder ?(capacity = 4096) () = 21 { buf = Bytes.create capacity; pos = 0; cap = capacity } 22 23(** Reset encoder position to reuse buffer *) 24let reset_encoder enc = enc.pos <- 0 25 26(** Grow the encoder buffer to accommodate at least [min_additional] more bytes 27*) 28let grow enc min_additional = 29 let new_cap = max (enc.cap * 2) (enc.pos + min_additional) in 30 let new_buf = Bytes.create new_cap in 31 Bytes.blit enc.buf 0 new_buf 0 enc.pos; 32 enc.buf <- new_buf; 33 enc.cap <- new_cap 34 35(** Ensure capacity for n more bytes *) 36let[@inline] ensure_capacity enc n = if enc.pos + n > enc.cap then grow enc n 37 38(** Get the encoded contents as bytes *) 39let encoder_contents enc = Bytes.sub enc.buf 0 enc.pos 40 41(** Get the encoded contents as string *) 42let encoder_contents_string enc = Bytes.sub_string enc.buf 0 enc.pos 43 44(** {2 Primitive Writers} *) 45 46(** Write a single byte *) 47let[@inline] write_u8 enc b = 48 if enc.pos >= enc.cap then grow enc 16; 49 Bytes.unsafe_set enc.buf enc.pos (Char.chr b); 50 enc.pos <- enc.pos + 1 51 52(** Write a 32-bit unsigned integer in big-endian *) 53let[@inline] write_u32 enc n = 54 if enc.pos + 4 > enc.cap then grow enc 16; 55 Bytes.set_int32_be enc.buf enc.pos (Int32.of_int n); 56 enc.pos <- enc.pos + 4 57 58(** Set a 32-bit value at a specific offset (for backpatching) *) 59let[@inline] set_u32 enc offset n = 60 Bytes.set_int32_be enc.buf offset (Int32.of_int n) 61 62(** Write raw bytes *) 63let write_bytes enc data = 64 let len = Bytes.length data in 65 if enc.pos + len > enc.cap then grow enc len; 66 Bytes.blit data 0 enc.buf enc.pos len; 67 enc.pos <- enc.pos + len 68 69(** Write a string as raw bytes *) 70let write_string enc s = 71 let len = String.length s in 72 if enc.pos + len > enc.cap then grow enc len; 73 Bytes.blit_string s 0 enc.buf enc.pos len; 74 enc.pos <- enc.pos + len 75 76(** {2 Variable-Length Integer Encoding} *) 77 78(** Encode variable-length unsigned integer up to 57 bits (8 bytes max). 79 80 Format: each byte uses 7 bits for value, high bit indicates continuation. 81 {v 82 byte 1 byte 2 ... byte 8 83 ?zzzzzzz ?zzzzzzz zzzzzzzz 84 v} 85 where ? = 1 means more bytes follow, ? = 0 means last byte *) 86let[@inline] write_vu57 enc num = 87 if enc.pos + 10 > enc.cap then grow enc 16; 88 let buf = enc.buf in 89 let rec write_loop pos n = 90 if n <= 0x7f then begin 91 Bytes.unsafe_set buf pos (Char.chr n); 92 pos + 1 93 end 94 else begin 95 Bytes.unsafe_set buf pos (Char.chr (0x80 lor (n land 0x7f))); 96 write_loop (pos + 1) (n lsr 7) 97 end 98 in 99 enc.pos <- write_loop enc.pos num 100 101(** Encode 1-bit flag + up to 56-bit value. 102 103 Format: first byte uses 6 bits for value, then continuation like vu57. 104 {v 105 byte 1 byte 2 ... 106 f?zzzzzz ?zzzzzzz 107 v} 108 where f = flag bit, ? = continuation bit *) 109let[@inline] write_b1vu56 enc flag num = 110 if enc.pos + 10 > enc.cap then grow enc 16; 111 let buf = enc.buf in 112 let flag_bit = if flag then 0x80 else 0 in 113 if num <= 0x3f then begin 114 Bytes.unsafe_set buf enc.pos (Char.chr (flag_bit lor num)); 115 enc.pos <- enc.pos + 1 116 end 117 else begin 118 Bytes.unsafe_set buf enc.pos 119 (Char.chr (flag_bit lor 0x40 lor (num land 0x3f))); 120 let rec write_rest pos n = 121 if n <= 0x7f then begin 122 Bytes.unsafe_set buf pos (Char.chr n); 123 pos + 1 124 end 125 else begin 126 Bytes.unsafe_set buf pos (Char.chr (0x80 lor (n land 0x7f))); 127 write_rest (pos + 1) (n lsr 7) 128 end 129 in 130 enc.pos <- write_rest (enc.pos + 1) (num lsr 6) 131 end 132 133(** Write CRDT ID in compact form. 134 135 If session_index <= 7 and time_diff <= 15: single byte (0xxxyyyy) Otherwise: 136 b1vu56(1, session_index) + vu57(time_diff) *) 137let[@inline] write_id enc x y = 138 if x <= 7 && y <= 15 then begin 139 if enc.pos >= enc.cap then grow enc 16; 140 Bytes.unsafe_set enc.buf enc.pos (Char.chr ((x lsl 4) lor y)); 141 enc.pos <- enc.pos + 1 142 end 143 else begin 144 write_b1vu56 enc true x; 145 write_vu57 enc y 146 end 147 148(** {2 CBOR Encoding} *) 149 150(** Write CBOR unsigned integer (major type 0) *) 151let[@inline] write_cbor_uint enc n = 152 if enc.pos + 9 > enc.cap then grow enc 16; 153 let buf = enc.buf in 154 let pos = enc.pos in 155 if n <= 23 then begin 156 Bytes.unsafe_set buf pos (Char.chr n); 157 enc.pos <- pos + 1 158 end 159 else if n <= 0xff then begin 160 Bytes.unsafe_set buf pos (Char.chr 0x18); 161 Bytes.unsafe_set buf (pos + 1) (Char.chr n); 162 enc.pos <- pos + 2 163 end 164 else if n <= 0xffff then begin 165 Bytes.unsafe_set buf pos (Char.chr 0x19); 166 Bytes.unsafe_set buf (pos + 1) (Char.chr ((n lsr 8) land 0xff)); 167 Bytes.unsafe_set buf (pos + 2) (Char.chr (n land 0xff)); 168 enc.pos <- pos + 3 169 end 170 else if n <= 0xffffffff then begin 171 Bytes.unsafe_set buf pos (Char.chr 0x1a); 172 Bytes.unsafe_set buf (pos + 1) (Char.chr ((n lsr 24) land 0xff)); 173 Bytes.unsafe_set buf (pos + 2) (Char.chr ((n lsr 16) land 0xff)); 174 Bytes.unsafe_set buf (pos + 3) (Char.chr ((n lsr 8) land 0xff)); 175 Bytes.unsafe_set buf (pos + 4) (Char.chr (n land 0xff)); 176 enc.pos <- pos + 5 177 end 178 else begin 179 Bytes.unsafe_set buf pos (Char.chr 0x1b); 180 Bytes.unsafe_set buf (pos + 1) (Char.chr ((n lsr 56) land 0xff)); 181 Bytes.unsafe_set buf (pos + 2) (Char.chr ((n lsr 48) land 0xff)); 182 Bytes.unsafe_set buf (pos + 3) (Char.chr ((n lsr 40) land 0xff)); 183 Bytes.unsafe_set buf (pos + 4) (Char.chr ((n lsr 32) land 0xff)); 184 Bytes.unsafe_set buf (pos + 5) (Char.chr ((n lsr 24) land 0xff)); 185 Bytes.unsafe_set buf (pos + 6) (Char.chr ((n lsr 16) land 0xff)); 186 Bytes.unsafe_set buf (pos + 7) (Char.chr ((n lsr 8) land 0xff)); 187 Bytes.unsafe_set buf (pos + 8) (Char.chr (n land 0xff)); 188 enc.pos <- pos + 9 189 end 190 191(** Write CBOR negative integer (major type 1) *) 192let write_cbor_negint enc n = 193 let encoded = -1 - n in 194 if encoded <= 23 then write_u8 enc (0x20 lor encoded) 195 else if encoded <= 0xff then begin 196 write_u8 enc 0x38; 197 write_u8 enc encoded 198 end 199 else if encoded <= 0xffff then begin 200 write_u8 enc 0x39; 201 write_u8 enc ((encoded lsr 8) land 0xff); 202 write_u8 enc (encoded land 0xff) 203 end 204 else if encoded <= 0xffffffff then begin 205 write_u8 enc 0x3a; 206 write_u8 enc ((encoded lsr 24) land 0xff); 207 write_u8 enc ((encoded lsr 16) land 0xff); 208 write_u8 enc ((encoded lsr 8) land 0xff); 209 write_u8 enc (encoded land 0xff) 210 end 211 else begin 212 write_u8 enc 0x3b; 213 write_u8 enc ((encoded lsr 56) land 0xff); 214 write_u8 enc ((encoded lsr 48) land 0xff); 215 write_u8 enc ((encoded lsr 40) land 0xff); 216 write_u8 enc ((encoded lsr 32) land 0xff); 217 write_u8 enc ((encoded lsr 24) land 0xff); 218 write_u8 enc ((encoded lsr 16) land 0xff); 219 write_u8 enc ((encoded lsr 8) land 0xff); 220 write_u8 enc (encoded land 0xff) 221 end 222 223(** Write CBOR integer (chooses unsigned or negative encoding) *) 224let[@inline] write_cbor_int enc n = 225 if n >= 0 then write_cbor_uint enc n else write_cbor_negint enc n 226 227(** Write CBOR text string (major type 3) *) 228let[@inline] write_cbor_string enc s = 229 let len = String.length s in 230 if enc.pos + len + 5 > enc.cap then grow enc (len + 8); 231 let buf = enc.buf in 232 let pos = enc.pos in 233 let header_len = 234 if len <= 23 then begin 235 Bytes.unsafe_set buf pos (Char.chr (0x60 lor len)); 236 1 237 end 238 else if len <= 0xff then begin 239 Bytes.unsafe_set buf pos (Char.chr 0x78); 240 Bytes.unsafe_set buf (pos + 1) (Char.chr len); 241 2 242 end 243 else if len <= 0xffff then begin 244 Bytes.unsafe_set buf pos (Char.chr 0x79); 245 Bytes.unsafe_set buf (pos + 1) (Char.chr ((len lsr 8) land 0xff)); 246 Bytes.unsafe_set buf (pos + 2) (Char.chr (len land 0xff)); 247 3 248 end 249 else begin 250 Bytes.unsafe_set buf pos (Char.chr 0x7a); 251 Bytes.unsafe_set buf (pos + 1) (Char.chr ((len lsr 24) land 0xff)); 252 Bytes.unsafe_set buf (pos + 2) (Char.chr ((len lsr 16) land 0xff)); 253 Bytes.unsafe_set buf (pos + 3) (Char.chr ((len lsr 8) land 0xff)); 254 Bytes.unsafe_set buf (pos + 4) (Char.chr (len land 0xff)); 255 5 256 end 257 in 258 Bytes.blit_string s 0 buf (pos + header_len) len; 259 enc.pos <- pos + header_len + len 260 261(** Write CBOR byte string (major type 2) *) 262let write_cbor_bytes enc data = 263 let len = Bytes.length data in 264 if len <= 23 then write_u8 enc (0x40 lor len) 265 else if len <= 0xff then begin 266 write_u8 enc 0x58; 267 write_u8 enc len 268 end 269 else if len <= 0xffff then begin 270 write_u8 enc 0x59; 271 write_u8 enc ((len lsr 8) land 0xff); 272 write_u8 enc (len land 0xff) 273 end 274 else begin 275 write_u8 enc 0x5a; 276 write_u8 enc ((len lsr 24) land 0xff); 277 write_u8 enc ((len lsr 16) land 0xff); 278 write_u8 enc ((len lsr 8) land 0xff); 279 write_u8 enc (len land 0xff) 280 end; 281 write_bytes enc data 282 283(** Write CBOR float64 (major type 7, additional 27) *) 284let write_cbor_float enc f = 285 write_u8 enc 0xfb; 286 let bits = Int64.bits_of_float f in 287 for i = 7 downto 0 do 288 write_u8 enc 289 (Int64.to_int (Int64.logand (Int64.shift_right bits (i * 8)) 0xffL)) 290 done 291 292(** Write CBOR null *) 293let[@inline] write_cbor_null enc = write_u8 enc 0xf6 294 295(** Write CBOR undefined *) 296let[@inline] write_cbor_undefined enc = write_u8 enc 0xf7 297 298(** Write CBOR boolean *) 299let[@inline] write_cbor_bool enc b = write_u8 enc (if b then 0xf5 else 0xf4) 300 301(** Write CBOR array header (major type 4) *) 302let write_cbor_array_header enc len = 303 if len <= 23 then write_u8 enc (0x80 lor len) 304 else if len <= 0xff then begin 305 write_u8 enc 0x98; 306 write_u8 enc len 307 end 308 else if len <= 0xffff then begin 309 write_u8 enc 0x99; 310 write_u8 enc ((len lsr 8) land 0xff); 311 write_u8 enc (len land 0xff) 312 end 313 else begin 314 write_u8 enc 0x9a; 315 write_u8 enc ((len lsr 24) land 0xff); 316 write_u8 enc ((len lsr 16) land 0xff); 317 write_u8 enc ((len lsr 8) land 0xff); 318 write_u8 enc (len land 0xff) 319 end 320 321(** Write CBOR map header (major type 5) *) 322let write_cbor_map_header enc len = 323 if len <= 23 then write_u8 enc (0xa0 lor len) 324 else if len <= 0xff then begin 325 write_u8 enc 0xb8; 326 write_u8 enc len 327 end 328 else if len <= 0xffff then begin 329 write_u8 enc 0xb9; 330 write_u8 enc ((len lsr 8) land 0xff); 331 write_u8 enc (len land 0xff) 332 end 333 else begin 334 write_u8 enc 0xba; 335 write_u8 enc ((len lsr 24) land 0xff); 336 write_u8 enc ((len lsr 16) land 0xff); 337 write_u8 enc ((len lsr 8) land 0xff); 338 write_u8 enc (len land 0xff) 339 end 340 341(** Write a complete Value.t as CBOR via Cbor_simd *) 342let write_cbor_value enc (v : Value.t) = 343 let cbor_bytes = Cbor_simd.encode v in 344 write_bytes enc cbor_bytes 345 346(** {1 Decoder} *) 347 348type decoder = { data : bytes; mutable pos : int; len : int } 349(** Mutable decoder state *) 350 351(** Create a decoder from bytes *) 352let create_decoder data = { data; pos = 0; len = Bytes.length data } 353 354(** Create a decoder from string *) 355let create_decoder_string s = 356 { data = Bytes.of_string s; pos = 0; len = String.length s } 357 358(** Get remaining bytes in decoder *) 359let decoder_remaining dec = dec.len - dec.pos 360 361(** Check if decoder has more data *) 362let decoder_has_more dec = dec.pos < dec.len 363 364(** Get current decoder position *) 365let decoder_pos dec = dec.pos 366 367(** Set decoder position (for seeking) *) 368let set_decoder_pos dec pos = dec.pos <- pos 369 370(** {2 Primitive Readers} *) 371 372(** Read a single byte *) 373let read_u8 dec = 374 if dec.pos >= dec.len then failwith "read_u8: unexpected end"; 375 let b = Bytes.get_uint8 dec.data dec.pos in 376 dec.pos <- dec.pos + 1; 377 b 378 379(** Peek at next byte without consuming *) 380let peek_u8 dec = 381 if dec.pos >= dec.len then failwith "peek_u8: unexpected end"; 382 Bytes.get_uint8 dec.data dec.pos 383 384(** Read 32-bit unsigned integer in big-endian *) 385let read_u32 dec = 386 if dec.pos + 4 > dec.len then failwith "read_u32: unexpected end"; 387 let n = Bytes.get_int32_be dec.data dec.pos in 388 dec.pos <- dec.pos + 4; 389 Int32.to_int n 390 391(** Read n bytes *) 392let read_bytes dec n = 393 if dec.pos + n > dec.len then failwith "read_bytes: unexpected end"; 394 let result = Bytes.sub dec.data dec.pos n in 395 dec.pos <- dec.pos + n; 396 result 397 398(** Read n bytes as string *) 399let read_string dec n = Bytes.to_string (read_bytes dec n) 400 401(** {2 Variable-Length Integer Decoding} *) 402 403(** Read variable-length unsigned integer (up to 57 bits) *) 404let read_vu57 dec = 405 let rec loop result shift = 406 let b = read_u8 dec in 407 let value = b land 0x7f in 408 let result = result lor (value lsl shift) in 409 if b < 0x80 then result else loop result (shift + 7) 410 in 411 loop 0 0 412 413(** Read 1-bit flag + variable-length unsigned integer (up to 56 bits) *) 414let read_b1vu56 dec = 415 let byte = read_u8 dec in 416 let flag = byte land 0x80 <> 0 in 417 let first_value = byte land 0x3f in 418 let has_continuation = byte land 0x40 <> 0 in 419 if not has_continuation then (flag, first_value) 420 else 421 let rec loop result shift = 422 let b = read_u8 dec in 423 let value = b land 0x7f in 424 let result = result lor (value lsl shift) in 425 if b < 0x80 then result else loop result (shift + 7) 426 in 427 let rest = loop 0 0 in 428 (flag, first_value lor (rest lsl 6)) 429 430(** Read CRDT ID in compact form. Returns (session_index, time_diff) *) 431let read_id dec = 432 let byte = read_u8 dec in 433 if byte <= 127 then (byte lsr 4, byte land 0x0f) 434 else begin 435 dec.pos <- dec.pos - 1; 436 let _, x = read_b1vu56 dec in 437 let y = read_vu57 dec in 438 (x, y) 439 end 440 441(** {2 CBOR Decoding} *) 442 443(** Read CBOR length/value based on additional info byte *) 444let read_cbor_length dec additional = 445 if additional <= 23 then additional 446 else if additional = 24 then read_u8 dec 447 else if additional = 25 then 448 let b1 = read_u8 dec in 449 let b2 = read_u8 dec in 450 (b1 lsl 8) lor b2 451 else if additional = 26 then 452 let b1 = read_u8 dec in 453 let b2 = read_u8 dec in 454 let b3 = read_u8 dec in 455 let b4 = read_u8 dec in 456 (b1 lsl 24) lor (b2 lsl 16) lor (b3 lsl 8) lor b4 457 else if additional = 27 then begin 458 let bytes = read_bytes dec 8 in 459 let b0 = Bytes.get_uint8 bytes 0 in 460 let b1 = Bytes.get_uint8 bytes 1 in 461 let b2 = Bytes.get_uint8 bytes 2 in 462 let b3 = Bytes.get_uint8 bytes 3 in 463 let b4 = Bytes.get_uint8 bytes 4 in 464 let b5 = Bytes.get_uint8 bytes 5 in 465 let b6 = Bytes.get_uint8 bytes 6 in 466 let b7 = Bytes.get_uint8 bytes 7 in 467 (b0 lsl 56) lor (b1 lsl 48) lor (b2 lsl 40) lor (b3 lsl 32) lor (b4 lsl 24) 468 lor (b5 lsl 16) lor (b6 lsl 8) lor b7 469 end 470 else failwith "read_cbor_length: unsupported additional info" 471 472(** Read CBOR float64 *) 473let read_cbor_float dec = 474 let bytes = read_bytes dec 8 in 475 let bits = 476 Int64.logor 477 (Int64.logor 478 (Int64.logor 479 (Int64.shift_left (Int64.of_int (Bytes.get_uint8 bytes 0)) 56) 480 (Int64.shift_left (Int64.of_int (Bytes.get_uint8 bytes 1)) 48)) 481 (Int64.logor 482 (Int64.shift_left (Int64.of_int (Bytes.get_uint8 bytes 2)) 40) 483 (Int64.shift_left (Int64.of_int (Bytes.get_uint8 bytes 3)) 32))) 484 (Int64.logor 485 (Int64.logor 486 (Int64.shift_left (Int64.of_int (Bytes.get_uint8 bytes 4)) 24) 487 (Int64.shift_left (Int64.of_int (Bytes.get_uint8 bytes 5)) 16)) 488 (Int64.logor 489 (Int64.shift_left (Int64.of_int (Bytes.get_uint8 bytes 6)) 8) 490 (Int64.of_int (Bytes.get_uint8 bytes 7)))) 491 in 492 Int64.float_of_bits bits 493 494(** Read a complete CBOR value as Value.t via Cbor_simd. 495 496 Note: We need to read the CBOR value starting at the current decoder 497 position, then advance the decoder past the consumed bytes. Since 498 Cbor_simd's decoder tracks its own position, we create a sub-decoder and 499 sync positions after. *) 500let read_cbor_value dec : Value.t = 501 let start_pos = dec.pos in 502 let remaining = Bytes.sub dec.data start_pos (dec.len - start_pos) in 503 let cbor_dec = Cbor_simd.create_decoder_bytes remaining in 504 let value = Cbor_simd.read_value cbor_dec in 505 dec.pos <- start_pos + Cbor_simd.decoder_pos cbor_dec; 506 value 507 508(** Read a CBOR string (expects major type 3) *) 509let read_cbor_string_only dec = 510 let byte = read_u8 dec in 511 let major = byte lsr 5 in 512 let additional = byte land 0x1f in 513 if major <> 3 then failwith "read_cbor_string_only: expected string"; 514 let len = read_cbor_length dec additional in 515 read_string dec len 516 517(** {1 Convenience Functions} *) 518 519(** Encode a Value.t to CBOR bytes *) 520let encode_cbor (v : Value.t) : bytes = 521 let enc = create_encoder () in 522 write_cbor_value enc v; 523 encoder_contents enc 524 525(** Encode a Value.t to CBOR string *) 526let encode_cbor_string (v : Value.t) : string = 527 let enc = create_encoder () in 528 write_cbor_value enc v; 529 encoder_contents_string enc 530 531(** Decode CBOR bytes to Value.t *) 532let decode_cbor (data : bytes) : Value.t = 533 let dec = create_decoder data in 534 read_cbor_value dec 535 536(** Decode CBOR string to Value.t *) 537let decode_cbor_string (s : string) : Value.t = 538 let dec = create_decoder_string s in 539 read_cbor_value dec